set.seed(1) # to return the same result in the following chunks

1 Introduction and learning objectives

2 Load data

There are two sets of data, i) training data that has the actual prices ii) out of sample data that has the asking prices. Load both data sets.

Make sure you understand what information each column contains. Note that not all information provided might be useful in predicting house prices, but do not make any assumptions before you decide what information you use in your prediction algorithms.

#read in the data

london_house_prices_2019_training<-read.csv("training_data_assignment_with_prices.csv")
london_house_prices_2019_out_of_sample<-read.csv("test_data_assignment.csv")



#fix data types in both data sets

#fix dates
london_house_prices_2019_training <- london_house_prices_2019_training %>% mutate(date=as.Date(date))
london_house_prices_2019_out_of_sample<-london_house_prices_2019_out_of_sample %>% mutate(date=as.Date(date))
#change characters to factors
london_house_prices_2019_training <- london_house_prices_2019_training %>% mutate_if(is.character,as.factor)
london_house_prices_2019_out_of_sample<-london_house_prices_2019_out_of_sample %>% mutate_if(is.character,as.factor)

#take a quick look at what's in the data
str(london_house_prices_2019_training)
## 'data.frame':    13998 obs. of  37 variables:
##  $ ID                          : int  2 3 4 5 7 8 9 10 11 12 ...
##  $ date                        : Date, format: "2019-11-01" "2019-08-08" ...
##  $ postcode                    : Factor w/ 12635 levels "BR1 1AB","BR1 1LR",..: 10897 11027 11264 2031 11241 11066 421 9594 9444 873 ...
##  $ property_type               : Factor w/ 4 levels "D","F","S","T": 2 2 3 2 3 2 1 4 4 2 ...
##  $ whether_old_or_new          : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ freehold_or_leasehold       : Factor w/ 2 levels "F","L": 2 2 1 2 1 2 1 1 1 2 ...
##  $ address1                    : Factor w/ 2825 levels "1","1 - 2","1 - 3",..: 2503 792 253 789 569 234 264 418 5 274 ...
##  $ address2                    : Factor w/ 434 levels "1","10","101",..: 372 NA NA NA NA NA NA NA NA NA ...
##  $ address3                    : Factor w/ 8543 levels "ABBERTON WALK",..: 6990 6821 3715 2492 4168 2879 3620 5251 6045 6892 ...
##  $ town                        : Factor w/ 133 levels "ABBEY WOOD","ACTON",..: NA NA NA 78 NA NA NA NA NA NA ...
##  $ local_aut                   : Factor w/ 69 levels "ASHFORD","BARKING",..: 36 46 24 36 24 46 65 36 36 17 ...
##  $ county                      : Factor w/ 33 levels "BARKING AND DAGENHAM",..: 22 27 18 25 18 27 5 27 32 8 ...
##  $ postcode_short              : Factor w/ 247 levels "BR1","BR2","BR3",..: 190 194 198 28 198 194 4 169 167 8 ...
##  $ current_energy_rating       : Factor w/ 6 levels "B","C","D","E",..: 4 3 3 4 3 2 4 3 4 2 ...
##  $ total_floor_area            : num  30 50 100 39 88 101 136 148 186 65 ...
##  $ number_habitable_rooms      : int  2 2 5 2 4 4 6 6 6 3 ...
##  $ co2_emissions_current       : num  2.3 3 3.7 2.8 3.9 3.1 8.1 5.6 10 1.5 ...
##  $ co2_emissions_potential     : num  1.7 1.7 1.5 1.1 1.4 1.4 4.1 2 6.1 1.5 ...
##  $ energy_consumption_current  : int  463 313 212 374 251 175 339 216 308 128 ...
##  $ energy_consumption_potential: int  344 175 82 144 90 77 168 75 186 128 ...
##  $ windows_energy_eff          : Factor w/ 5 levels "Average","Good",..: 1 1 1 5 1 1 1 1 5 1 ...
##  $ tenure                      : Factor w/ 3 levels "owner-occupied",..: 1 2 1 2 1 1 1 2 1 1 ...
##  $ latitude                    : num  51.5 51.5 51.5 51.6 51.5 ...
##  $ longitude                   : num  -0.1229 -0.2828 -0.4315 0.0423 -0.4293 ...
##  $ population                  : int  34 75 83 211 73 51 25 91 60 97 ...
##  $ altitude                    : int  8 9 25 11 21 11 95 7 7 106 ...
##  $ london_zone                 : int  1 3 5 3 6 6 3 2 2 3 ...
##  $ nearest_station             : Factor w/ 592 levels "abbey road","abbey wood",..: 478 358 235 319 180 502 566 30 32 566 ...
##  $ water_company               : Factor w/ 5 levels "Affinity Water",..: 5 5 1 5 1 5 5 5 5 5 ...
##  $ average_income              : int  57200 61900 50600 45400 49000 56200 57200 65600 50400 52300 ...
##  $ district                    : Factor w/ 33 levels "Barking and Dagenham",..: 22 27 18 26 18 27 5 27 32 8 ...
##  $ price                       : num  360000 408500 499950 259999 395000 ...
##  $ type_of_closest_station     : Factor w/ 3 levels "light_rail","rail",..: 3 2 3 1 3 2 1 3 1 1 ...
##  $ num_tube_lines              : int  1 0 1 0 1 0 0 2 0 0 ...
##  $ num_rail_lines              : int  0 1 1 0 1 1 0 0 1 0 ...
##  $ num_light_rail_lines        : int  0 0 0 1 0 0 1 0 1 1 ...
##  $ distance_to_station         : num  0.528 0.77 0.853 0.29 1.073 ...
str(london_house_prices_2019_out_of_sample)
## 'data.frame':    1999 obs. of  37 variables:
##  $ ID                          : int  14434 12562 8866 10721 1057 1527 13961 12108 9363 1155 ...
##  $ date                        : Date, format: NA NA ...
##  $ postcode                    : logi  NA NA NA NA NA NA ...
##  $ property_type               : Factor w/ 4 levels "D","F","S","T": 1 2 2 3 4 3 2 3 2 4 ...
##  $ whether_old_or_new          : Factor w/ 2 levels "N","Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ freehold_or_leasehold       : Factor w/ 2 levels "F","L": 1 2 2 1 1 1 2 1 2 1 ...
##  $ address1                    : logi  NA NA NA NA NA NA ...
##  $ address2                    : logi  NA NA NA NA NA NA ...
##  $ address3                    : logi  NA NA NA NA NA NA ...
##  $ town                        : Factor w/ 54 levels "ACTON","ADDISCOMBE",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ local_aut                   : logi  NA NA NA NA NA NA ...
##  $ county                      : logi  NA NA NA NA NA NA ...
##  $ postcode_short              : Factor w/ 221 levels "BR1","BR2","BR3",..: 82 50 37 52 214 150 159 115 175 126 ...
##  $ current_energy_rating       : Factor w/ 6 levels "B","C","D","E",..: 3 2 3 3 4 4 4 3 4 3 ...
##  $ total_floor_area            : num  150 59 58 74 97.3 ...
##  $ number_habitable_rooms      : int  6 2 2 5 5 5 5 4 2 5 ...
##  $ co2_emissions_current       : num  7.3 1.5 2.8 3.5 6.5 4.9 5.1 2.9 4.2 4.3 ...
##  $ co2_emissions_potential     : num  2.4 1.4 1.2 1.2 5.7 1.6 3 0.8 3.2 2.5 ...
##  $ energy_consumption_current  : int  274 142 253 256 303 309 240 224 458 253 ...
##  $ energy_consumption_potential: int  89 136 110 80 266 101 140 58 357 143 ...
##  $ windows_energy_eff          : Factor w/ 5 levels "Average","Good",..: 1 1 1 1 1 1 3 1 3 1 ...
##  $ tenure                      : Factor w/ 3 levels "owner-occupied",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ latitude                    : num  51.6 51.6 51.5 51.6 51.5 ...
##  $ longitude                   : num  -0.129 -0.2966 -0.0328 -0.3744 -0.2576 ...
##  $ population                  : int  87 79 23 73 100 24 22 49 65 98 ...
##  $ altitude                    : int  63 38 17 39 8 46 26 16 14 18 ...
##  $ london_zone                 : int  4 4 2 5 2 4 3 6 1 3 ...
##  $ nearest_station             : Factor w/ 494 levels "abbey wood","acton central",..: 16 454 181 302 431 142 20 434 122 212 ...
##  $ water_company               : Factor w/ 4 levels "Affinity Water",..: 4 1 4 1 4 4 4 2 4 4 ...
##  $ average_income              : int  61300 48900 46200 52200 60700 59600 64000 48100 56600 53500 ...
##  $ district                    : Factor w/ 32 levels "Barking and Dagenham",..: 9 4 29 14 17 10 31 15 19 22 ...
##  $ type_of_closest_station     : Factor w/ 3 levels "light_rail","rail",..: 3 3 1 2 3 2 3 3 3 2 ...
##  $ num_tube_lines              : int  1 2 0 0 2 0 1 1 2 0 ...
##  $ num_rail_lines              : int  0 1 0 1 0 1 1 0 0 1 ...
##  $ num_light_rail_lines        : int  0 1 1 0 0 0 0 1 0 0 ...
##  $ distance_to_station         : num  0.839 0.104 0.914 0.766 0.449 ...
##  $ asking_price                : num  750000 229000 152000 379000 930000 350000 688000 386000 534000 459000 ...

Additional data features will be added as below. The two chosen features are school_number and crime_number. The school data is from 2016. The crime number is the total crime as of year 2020.

# number of school data source https://data.gov.uk/dataset/6b776872-c786-4960-af1d-dab521aa4ab0/london-schools-atlas

education <- read_sf(here("data/All_schools_shp/school_data_london_Atlas_2016.shp"))

borough_education <- education %>% 
  group_by(LA_NAME) %>% 
  summarise(school_number = n())

borough_education <- as.data.frame(borough_education) %>% 
  select(-geometry)

# training add education feature
training_education <- london_house_prices_2019_training %>%
  left_join(borough_education, by=c("district" = "LA_NAME"))

# out-of-sample add education feature
testing_education <- london_house_prices_2019_out_of_sample %>%
  left_join(borough_education, by=c("district" = "LA_NAME"))
#crime data source https://data.london.gov.uk/dataset/recorded_crime_summary

crime <- read_csv(here("data/MPS Borough Level Crime (most recent 24 months).csv"))

colnames(crime)[5:16] = c("crime202001", "crime202002", "crime202003", "crime202004", "crime202005", "crime202006", "crime202007", "crime202008", "crime202009", "crime202010", "crime202011", "crime202012")

number <- crime %>%  
  select(crime202001, crime202002, crime202003, crime202004, crime202005, crime202006, crime202007, crime202008, crime202009, crime202010, crime202011, crime202012)

number_crime <- number %>% 
  mutate(crime_number = rowSums(number))

borough_crime <- as.data.frame(cbind(crime, number_crime$crime_number))
colnames(borough_crime)[28] = c("crime_number")

borough_crime <- borough_crime %>% 
  select(LookUp_BoroughName, crime_number) %>% 
  group_by(LookUp_BoroughName) %>% 
  summarise(crime_number = sum(crime_number))

# training add crime feature
london_house_prices_2019_training <- training_education %>%
  left_join(borough_crime, by=c("district" = "LookUp_BoroughName")) %>% 
  filter(!is.na(crime_number)) %>% 
  filter(!is.na(population))

# training set scale the useful numeric data
features_needs_scaling <- london_house_prices_2019_training %>% 
  select(total_floor_area, co2_emissions_current, energy_consumption_current, latitude, longitude, london_zone, average_income, num_light_rail_lines, num_rail_lines, num_tube_lines, distance_to_station, school_number, crime_number)
features_needs_scaling <- scale(features_needs_scaling, center=FALSE, scale=TRUE)
temp <- london_house_prices_2019_training %>% 
  select(-colnames(features_needs_scaling))
london_house_prices_2019_training <- as.data.frame(cbind(temp,features_needs_scaling))

# out-of-sample set add crime feature
london_house_prices_2019_out_of_sample <- testing_education %>%
  left_join(borough_crime, by=c("district" = "LookUp_BoroughName")) %>% 
  filter(!is.na(crime_number)) %>% 
  filter(!is.na(population))

# out-of-sample set scale the useful numeric data
features_needs_scaling <- london_house_prices_2019_out_of_sample %>% 
  select(total_floor_area, co2_emissions_current, energy_consumption_current, latitude, longitude, london_zone, average_income, num_light_rail_lines, num_rail_lines, num_tube_lines, distance_to_station, school_number, crime_number)
features_needs_scaling <- scale(features_needs_scaling, center=FALSE, scale=TRUE)
temp <- london_house_prices_2019_out_of_sample %>% 
  select(-colnames(features_needs_scaling))
london_house_prices_2019_out_of_sample <- as.data.frame(cbind(temp,features_needs_scaling))
skim(london_house_prices_2019_training)
Data summary
Name london_house_prices_2019_…
Number of rows 13923
Number of columns 39
_______________________
Column type frequency:
character 1
Date 1
factor 17
numeric 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
district 0 1 5 22 0 32 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 2019-01-02 2019-12-30 2019-07-22 262

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
postcode 0 1.00 FALSE 12566 E11: 5, BR1: 4, CR0: 4, E17: 4
property_type 0 1.00 FALSE 4 F: 5337, T: 4993, S: 2777, D: 816
whether_old_or_new 0 1.00 FALSE 2 N: 13916, Y: 7
freehold_or_leasehold 0 1.00 FALSE 2 F: 8446, L: 5477
address1 0 1.00 FALSE 2787 3: 220, 7: 211, 4: 204, 12: 197
address2 10797 0.22 FALSE 425 FLA: 216, FLA: 213, FLA: 211, FLA: 159
address3 0 1.00 FALSE 8508 LON: 25, GRE: 24, THE: 23, MAN: 21
town 13309 0.04 FALSE 133 WAL: 35, CHE: 33, STR: 27, CHI: 24
local_aut 0 1.00 FALSE 69 LON: 7462, ROM: 394, BRO: 274, CRO: 241
county 0 1.00 FALSE 33 BRO: 856, CRO: 723, WAN: 696, HAV: 670
postcode_short 0 1.00 FALSE 244 CR0: 241, SW1: 202, E17: 192, SW1: 187
current_energy_rating 0 1.00 FALSE 6 D: 7066, C: 3452, E: 2638, F: 359
windows_energy_eff 0 1.00 FALSE 5 Ave: 7794, Goo: 3199, Ver: 1696, Poo: 1229
tenure 0 1.00 FALSE 3 own: 11195, ren: 2471, ren: 257
nearest_station 0 1.00 FALSE 591 rom: 199, cha: 102, bex: 98, har: 96
water_company 0 1.00 FALSE 5 Tha: 10410, Aff: 1573, Ess: 1149, SES: 787
type_of_closest_station 0 1.00 FALSE 3 rai: 6482, tub: 4681, lig: 2760

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ID 0 1 8002.24 4622.93 2.00 4000.50 8017.00 12020.50 1.5996e+04 ▇▇▇▇▇
number_habitable_rooms 0 1 4.31 1.66 1.00 3.00 4.00 5.00 1.4000e+01 ▆▇▁▁▁
co2_emissions_potential 0 1 2.22 1.44 -0.50 1.30 1.80 2.70 2.0500e+01 ▇▁▁▁▁
energy_consumption_potential 0 1 141.26 77.68 -49.00 89.00 122.00 167.00 9.2900e+02 ▇▃▁▁▁
population 0 1 83.74 43.70 1.00 52.00 79.00 109.00 5.1000e+02 ▇▃▁▁▁
altitude 0 1 36.59 25.99 0.00 16.00 32.00 51.00 2.3900e+02 ▇▃▁▁▁
price 0 1 594106.58 519788.78 77000.00 351250.00 460000.00 650000.00 1.0800e+07 ▇▁▁▁▁
total_floor_area 0 1 0.90 0.44 0.20 0.62 0.81 1.05 4.6600e+00 ▇▂▁▁▁
co2_emissions_current 0 1 0.87 0.49 0.02 0.55 0.78 1.07 9.0100e+00 ▇▁▁▁▁
energy_consumption_current 0 1 0.94 0.33 0.04 0.72 0.89 1.10 4.6400e+00 ▇▅▁▁▁
latitude 0 1 1.00 0.00 1.00 1.00 1.00 1.00 1.0000e+00 ▂▇▇▇▂
longitude 0 1 -0.55 0.83 -2.56 -1.08 -0.56 0.01 1.5300e+00 ▂▅▇▅▂
london_zone 0 1 0.93 0.36 0.25 0.75 0.99 1.24 1.7400e+00 ▇▇▇▆▅
average_income 0 1 0.99 0.15 0.64 0.88 0.98 1.08 1.5200e+00 ▃▇▆▂▁
num_light_rail_lines 0 1 0.49 0.87 0.00 0.00 0.00 0.00 2.0500e+00 ▇▁▁▁▂
num_rail_lines 0 1 0.75 0.66 0.00 0.00 1.30 1.30 2.6000e+00 ▆▁▇▁▁
num_tube_lines 0 1 0.52 0.85 0.00 0.00 0.00 1.17 7.0500e+00 ▇▁▁▁▁
distance_to_station 0 1 0.85 0.53 0.00 0.49 0.74 1.09 7.3200e+00 ▇▁▁▁▁
school_number 0 1 0.97 0.24 0.57 0.85 0.93 1.11 1.5200e+00 ▃▇▆▃▂
crime_number 0 1 0.96 0.29 0.45 0.71 0.99 1.16 1.9200e+00 ▆▆▇▁▁
skim(london_house_prices_2019_out_of_sample)
Data summary
Name london_house_prices_2019_…
Number of rows 1992
Number of columns 39
_______________________
Column type frequency:
character 1
Date 1
factor 11
logical 6
numeric 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
district 0 1 5 22 0 32 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 1992 0 NA NA NA 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
property_type 0 1.00 FALSE 4 F: 778, T: 688, S: 395, D: 131
whether_old_or_new 0 1.00 FALSE 2 N: 1991, Y: 1
freehold_or_leasehold 0 1.00 FALSE 2 F: 1209, L: 783
town 1909 0.04 FALSE 54 CHE: 7, WAL: 7, BLA: 3, FOR: 3
postcode_short 0 1.00 FALSE 221 SW1: 40, CR0: 39, E17: 37, SW1: 28
current_energy_rating 0 1.00 FALSE 6 D: 979, C: 511, E: 398, F: 52
windows_energy_eff 0 1.00 FALSE 5 Ave: 1117, Goo: 470, Ver: 232, Poo: 172
tenure 0 1.00 FALSE 3 own: 1638, ren: 319, ren: 35
nearest_station 0 1.00 FALSE 493 orp: 22, sid: 22, rom: 20, bex: 17
water_company 0 1.00 FALSE 4 Tha: 1491, Aff: 236, Ess: 148, SES: 117
type_of_closest_station 0 1.00 FALSE 3 rai: 969, tub: 659, lig: 364

Variable type: logical

skim_variable n_missing complete_rate mean count
postcode 1992 0 NaN :
address1 1992 0 NaN :
address2 1992 0 NaN :
address3 1992 0 NaN :
local_aut 1992 0 NaN :
county 1992 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ID 0 1 7966.98 4589.37 1.00 3955.00 7914.00 11825.00 1.5997e+04 ▇▇▇▇▇
number_habitable_rooms 0 1 4.38 1.76 1.00 3.00 4.00 5.00 1.2000e+01 ▇▇▃▁▁
co2_emissions_potential 0 1 2.30 1.55 0.10 1.30 1.90 2.80 1.6000e+01 ▇▂▁▁▁
energy_consumption_potential 0 1 143.91 79.13 1.00 92.00 123.00 169.25 7.3200e+02 ▇▃▁▁▁
population 0 1 83.06 44.35 4.00 52.00 78.00 107.00 6.2000e+02 ▇▂▁▁▁
altitude 0 1 36.77 26.08 1.00 17.00 31.00 50.00 1.7500e+02 ▇▅▁▁▁
asking_price 0 1 627401.61 677878.92 83000.00 350000.00 474000.00 670000.00 1.6513e+07 ▇▁▁▁▁
total_floor_area 0 1 0.89 0.46 0.23 0.59 0.77 1.04 4.3900e+00 ▇▂▁▁▁
co2_emissions_current 0 1 0.86 0.51 0.10 0.53 0.75 1.05 4.3500e+00 ▇▃▁▁▁
energy_consumption_current 0 1 0.94 0.34 0.22 0.71 0.89 1.10 3.0800e+00 ▅▇▁▁▁
latitude 0 1 1.00 0.00 1.00 1.00 1.00 1.00 1.0000e+00 ▂▇▇▇▂
longitude 0 1 -0.56 0.83 -2.47 -1.08 -0.58 0.00 1.3800e+00 ▂▆▇▅▂
london_zone 0 1 0.94 0.35 0.25 0.74 0.99 1.24 1.4800e+00 ▁▇▁▅▆
average_income 0 1 0.99 0.15 0.65 0.89 0.97 1.08 1.5200e+00 ▃▇▆▂▁
num_light_rail_lines 0 1 0.47 0.88 0.00 0.00 0.00 0.00 2.1200e+00 ▇▁▁▁▂
num_rail_lines 0 1 0.76 0.65 0.00 0.00 1.29 1.29 2.5800e+00 ▆▁▇▁▁
num_tube_lines 0 1 0.51 0.86 0.00 0.00 0.00 1.15 6.9200e+00 ▇▁▁▁▁
distance_to_station 0 1 0.86 0.51 0.01 0.50 0.76 1.10 4.2000e+00 ▇▅▁▁▁
school_number 0 1 0.97 0.24 0.57 0.85 0.93 1.10 1.5200e+00 ▃▇▆▃▂
crime_number 0 1 0.95 0.30 0.45 0.67 0.98 1.14 1.9200e+00 ▆▇▇▁▁

From the skim result, we can see that although some of the variables have high complete rate in training set, the corresponding variables in out-of-sample set is close to 0. For example, post_code is complete in training set but empty in out-of-sample set. This means that in the following model building, this variable should not be used for training purpose, alternatively, postcode_short should be used since it has high complete rate in both training and out-of-sample sets.

set.seed(1) # to return the same result in the following chunks

#let's do the initial split
train_test_split <- initial_split(london_house_prices_2019_training, prop = 0.75) #training set contains 75% of the data
# Create the training dataset
train_data <- training(train_test_split)
test_data <- testing(train_test_split)

3 Visualize data

Visualize and examine the data. What plots could be useful here? What do you learn from these visualizations?

ggplot(london_house_prices_2019_training, aes(x=price)) + 
  geom_density() +
  theme_minimal() +
  labs(title = "Price is extremely right-skewed",
       subtitle = "Density plot of price",
       x = "Price",
       y = "Density")

We can see from the density plot that the price is mostly concentrated at low value with high-value outliers. Therefore, log transformation will be used to decrease the impact of high-value outliers, and thus making it easier to visualize the distribution of prices.

p1 <- ggplot(london_house_prices_2019_training, aes(x=log(price), fill = property_type)) + 
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Outliers exist mostly on right tail for most property types with exception of type F",
       subtitle = "Boxplot of log house prices",
       x="Log price",
       fill = "Property type")

p1

From the boxplot we can see that different property types have different distribution of log price according to the illustration above. We can see that F, which stands for flat, has relatively low prices compared to other types. It also has more lower-end outliers, meaning that some flats (e.g., basement flat) have significantly low prices. Type D, which stands for detached, have on average higher prices than other property types. This is intuitively the case in that detached houses are higher class and more luxurious in general. We can also see that S, which stands for semi-detached houses, has the smallest quantile range.

house_training <- london_house_prices_2019_training %>% 
  mutate(yrmon= as.yearmon(date)) %>% 
  group_by(nearest_station) %>% 
  summarise(median_price = median(price)) %>% 
  slice_max(median_price, n = 10) %>% 
  mutate( ToHighlight = ifelse( nearest_station == "hyde park corner"|nearest_station == "knightsbridge", "yes", "no" ) )


p2 <- ggplot(house_training, aes(x = reorder(nearest_station, -median_price), y = median_price, fill = ToHighlight))+
  geom_bar(stat="identity")+
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray" ), guide = FALSE ) +
  theme_minimal() +
  theme(axis.text.x=element_text(angle=45, hjust=1)) +
  labs(title = "House price near Hyde Park Corner and Knightsbridge are the highest",
       subtitle = "Top 10 stations with highest median house price",
       x = "Nearest station",
       y = "Median house price")

p2

Since average housing price is sensitive to outliers, we use median to represent the housing price level near each station. Looking closely, we can see that the first 6 stations are all a stone’s throw to parks (incl., Hyde Park, Green Park, Regent’s Park). We can also see that Hyde Park Corner station has the highest median price, this is probably because of i) the nice environment surrounded by Hyde Park and Buckingham Palace; ii) prime location which is 23 minutes from the central business district (City of London). The second richest neighborhood is Knightsbridge, which is 7 minutes walk from Hyde Park Corner station. Houses around these two stations seem to be significantly higher than other 8 stations in the top 10 stations with highest median house prices.

london_wards_sf <- read_sf(here("data/London-wards-2018_ESRI/London_Ward.shp"))
city_of_london_wgs84 <- london_wards_sf %>% 
  st_transform(4326)

price_district <- london_house_prices_2019_training %>% 
  group_by(district) %>% 
  summarise(median_price = median(price))

price_district[price_district == "City and County of the City of London"] <- "City of London"
price_district[price_district == "Westminster"] <- "City of Westminster"


price_wards <- city_of_london_wgs84 %>%
  left_join(price_district, by=c("DISTRICT" = "district"))

tmap::tmap_mode("view")
tmap::tm_shape(price_wards) +
  tm_polygons("median_price",
              palette = "Reds",
              title='Median housing price among districts') + 
  tm_layout(title= 'Pricy houses are centered around central-west London', 
            title.position = c('right', 'top'),
            legend.position = c("left","bottom"))

From the graph above we can see that the most pricy houses are concentrated around central and west London. This is the case because west London has more advanced infrastructures, parks and mansions that attract the middle to upper class investors. Note that our dataset does not include any observations in the district of the City of London. Therefore, the model developed in the following is not used to predict prices in the City of London.

Estimate a correlation table between prices and other continuous variables. What do you glean from the correlation table?

# produce a correlation table using GGally::ggcor()

library("GGally")
london_house_prices_2019_training %>% 
  select(-ID) %>% #keep Y variable last
  ggcorr(method = c("pairwise", "pearson"), layout.exp = 2,label_round=2, label = TRUE,label_size = 2,hjust = 1,nbreaks = 5,size = 2,angle = -20)+
  labs(title = "Price has highest correlation with total floor area",
       subtitle ="Correlation table of numeric variables")

Correlation matrix between pairs attributed in Table 1 is presented. The coefficient ranges between -1 and 1, where 1 means two attributes have strong and positive association. In contrast, -1 means two attributes have strong and negative associations. And 0 means the pair is perfectly uncorrelated. As shown in Fig. 2, attribute ‘price’ has a strong positive relationship with attributes ‘total floor area’, ‘number of habitable rooms’, ‘co2 emissions potential’, ‘co2 emissions current’, ‘average income’ and ‘number of tube lines’. In contrast, the attribute ‘Price’ has a strong negative relationship with attributes ‘energy consumption potential’, ‘London zone’. Intuitively, the bigger the house is, the higher the price. This can explain the positive correlation between ‘total floor area’ and ‘price’, ‘number of habitable rooms’ and ‘price’. And the richer the neighborhood, the higher the price of the house. This can explain the positive correlation between ‘average income’ and ‘price’. The more central the location is, the higher the price. This can explain the negative correlation between ‘London zone’ and ‘price’. Since energy efficiency is important for long term investment of a house, the lower the energy consumption, the higher the price. This can explain the negative correlation between ‘energy consumption potential’ and ‘price’. Note that unlike ‘energy consumption current’, which has a high negative correlation with ‘price’, there exhibits a high positive correlation between ‘co2 emission current’ and ‘price’. This is the case because co2 emission, unlike energy consumption, is not penalized in monetary terms for private building owners and therefore is not a critical consideration for investors. And co2 emission is highly correlated with how big the house is. Therefore, the higher the co2 emission current, the higher the price.

4 Fit a linear regression model

To help you get started I build a linear regression model below. I chose a subset of the features with no particular goal. You can (and should) add more variables and/or choose variable selection methods if you want.

Note that among these attributes that are highly correlated with attribute ‘Price’, some pairs of them are highly correlated between themselves (e.g., ‘total floor area’ and ‘co2 emission current’, ‘total floor area’ and ‘number of habitable rooms’, ‘co2 emission current’ and ‘co2 emission potential’). In the following linear regression model training steps, only one attribute from each pair should be used as input for each model. This is due to the multicollinearity problem, which can negatively impact the result of linear regression. This is because the key goal of linear regression is to isolate the relationship between each independent variable (e.g., ‘total floor area’) and the dependent variable (i.e., price). However, when independent variables are correlated (e.g., ‘total floor area’ and ‘co2 emission current’), it means change in ‘total floor area’ shifts ‘co2 emission current’. As a result, the model is difficult to estimate the relationship between each independent variable and the dependent variable since these independent variables tend to change together. Therefore, only one attribute of each pair will be selected as the input into the linear regression model. In the following, I will select ‘total floor area’ as input to avoid multicollinearity.

#Define control variables
control <- trainControl (
    method="cv",
    number=5,
    verboseIter=TRUE) #by setting this to true the model will report its progress after each estimation

#we are going to train the model and report the results using k-fold cross validation
model1_lm<-train(
  price ~ latitude+longitude+london_zone*total_floor_area+crime_number+school_number+distance_to_station+population+property_type,
  train_data,
  method = "lm",
  trControl = control,
  metric = "RMSE")
## + Fold1: intercept=TRUE 
## - Fold1: intercept=TRUE 
## + Fold2: intercept=TRUE 
## - Fold2: intercept=TRUE 
## + Fold3: intercept=TRUE 
## - Fold3: intercept=TRUE 
## + Fold4: intercept=TRUE 
## - Fold4: intercept=TRUE 
## + Fold5: intercept=TRUE 
## - Fold5: intercept=TRUE 
## Aggregating results
## Fitting final model on full training set
# summary of the results
summary(model1_lm)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1956064  -117359    -7756    97398  5812254 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -1.118e+07  1.836e+06  -6.089 1.17e-09 ***
## latitude                        1.104e+07  1.840e+06   6.002 2.01e-09 ***
## longitude                      -5.210e+04  3.414e+03 -15.261  < 2e-16 ***
## london_zone                     4.753e+05  1.875e+04  25.345  < 2e-16 ***
## total_floor_area                1.710e+06  1.632e+04 104.743  < 2e-16 ***
## crime_number                   -9.002e+04  1.417e+04  -6.353 2.20e-10 ***
## school_number                  -6.931e+04  1.548e+04  -4.476 7.68e-06 ***
## distance_to_station            -2.823e+04  5.854e+03  -4.821 1.45e-06 ***
## population                     -8.848e+02  6.495e+01 -13.623  < 2e-16 ***
## property_typeF                 -1.325e+05  1.550e+04  -8.544  < 2e-16 ***
## property_typeS                 -1.416e+05  1.377e+04 -10.277  < 2e-16 ***
## property_typeT                 -1.766e+05  1.409e+04 -12.531  < 2e-16 ***
## `london_zone:total_floor_area` -1.048e+06  1.761e+04 -59.525  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 282900 on 10429 degrees of freedom
## Multiple R-squared:   0.71,  Adjusted R-squared:  0.7097 
## F-statistic:  2128 on 12 and 10429 DF,  p-value: < 2.2e-16
# variable importance for LR
importance <- varImp(model1_lm, scale=TRUE)
plot(importance)

4.1 Predict the values in testing and out of sample data

Below I use the predict function to test the performance of the model in testing data and summarize the performance of the linear regression model. How can you measure the quality of your predictions?

set.seed(1) # to return the same result in the following chunks

# We can predict the testing values

predictions <- predict(model1_lm,test_data)

lr_results<-data.frame(  RMSE = RMSE(predictions, test_data$price), 
                            Rsquare = R2(predictions, test_data$price))

                            
lr_results                         
##       RMSE   Rsquare
## 1 291282.9 0.6657187
#We can predict prices for out of sample data the same way
predictions_oos <- predict(model1_lm,london_house_prices_2019_out_of_sample)

We can see from the result that Root Mean Square Error (RMSE) is the loss function we try to minimize. Rsquared reached 66.5%, meaning that 66.5% of the variance in the dependent variable price can be explained by the chosen independent variables collectively.

5 Fit a tree model

Next I fit a tree model using the same subset of features. Again you can (and should) add more variables and tune the parameter of your tree to find a better fit.

Compare the performance of the linear regression model with the tree model; which one performs better? Why do you think that is the case?

set.seed(1) # to return the same result in the following chunks

colnames_out_of_sample <- colnames(london_house_prices_2019_out_of_sample)

# pre-process train data for better tree prediction
train_data_tree <- train_data %>% 
  select(price, colnames_out_of_sample[colnames_out_of_sample != "asking_price"], -date, -address1,-address2,-address3, -town, -postcode, -ID, -local_aut, -county) %>% 
  na.omit()

trctrl <- trainControl(method = "cv", 
                       number = 5, 
                       classProbs=TRUE)

Grid <- expand.grid(cp = 0.01)

model2_tree <- train(
  price ~ latitude+longitude+london_zone+total_floor_area+crime_number+school_number+distance_to_station+property_type,
  train_data_tree,
  method = "rpart",
  metric="RMSE",
  trControl=trctrl,
  tuneGrid=Grid) 

#You can view how the tree performs
model2_tree$results
##     cp     RMSE  Rsquared      MAE   RMSESD RsquaredSD    MAESD
## 1 0.01 294609.8 0.6900071 167609.5 25274.21 0.04352829 9144.067
#You can view the final tree
rpart.plot(model2_tree$finalModel)

#you can also visualize the variable importance
importance <- varImp(model2_tree, scale=TRUE)
plot(importance)

test_data_tree <- test_data %>% 
  select(price, colnames_out_of_sample[colnames_out_of_sample != "asking_price"], -date, -address1,-address2,-address3, -town, -postcode, -ID, -local_aut, -county) %>% 
  na.omit()


predictions <- predict(model2_tree,test_data_tree)

tree_results<-data.frame(  RMSE = RMSE(predictions, test_data_tree$price), 
                            Rsquare = R2(predictions, test_data_tree$price))

                            
tree_results                         
##       RMSE   Rsquare
## 1 298042.6 0.6519898
#We can predict prices for out of sample data the same way
predictions_oos <- predict(model2_tree,london_house_prices_2019_out_of_sample)
predictions_oos
##         1         2         3         4         5         6         7         8 
##  683984.8  356419.4  356419.4  456465.4  715016.5  456465.4  683984.8  456465.4 
##         9        10        11        12        13        14        15        16 
##  356419.4  456465.4  456465.4 1368039.7  356419.4  683984.8  356419.4  456465.4 
##        17        18        19        20        21        22        23        24 
##  356419.4  356419.4 1118283.2  715016.5  456465.4  356419.4  356419.4 1368039.7 
##        25        26        27        28        29        30        31        32 
##  356419.4  683984.8  356419.4  456465.4  715016.5  683984.8  356419.4  456465.4 
##        33        34        35        36        37        38        39        40 
##  715016.5  683984.8  683984.8  356419.4  356419.4  356419.4  683984.8  456465.4 
##        41        42        43        44        45        46        47        48 
##  356419.4  683984.8  683984.8  356419.4  456465.4  356419.4  356419.4  456465.4 
##        49        50        51        52        53        54        55        56 
##  356419.4  356419.4  456465.4  456465.4  356419.4  356419.4  356419.4  683984.8 
##        57        58        59        60        61        62        63        64 
##  456465.4  356419.4  683984.8  356419.4 2014519.0  356419.4  356419.4  683984.8 
##        65        66        67        68        69        70        71        72 
##  356419.4  356419.4  456465.4 1240699.1  683984.8  356419.4  356419.4  456465.4 
##        73        74        75        76        77        78        79        80 
##  456465.4  456465.4  456465.4  356419.4  356419.4  356419.4  715016.5  356419.4 
##        81        82        83        84        85        86        87        88 
##  356419.4  456465.4  456465.4  356419.4 2014519.0 3816872.0  356419.4  356419.4 
##        89        90        91        92        93        94        95        96 
##  683984.8  456465.4  356419.4  683984.8  356419.4 2014519.0  356419.4  356419.4 
##        97        98        99       100       101       102       103       104 
##  356419.4  356419.4  356419.4  356419.4  356419.4  356419.4  683984.8  356419.4 
##       105       106       107       108       109       110       111       112 
##  356419.4  356419.4  356419.4  683984.8  456465.4  356419.4  456465.4  456465.4 
##       113       114       115       116       117       118       119       120 
##  456465.4  356419.4  356419.4 2014519.0  456465.4  683984.8  356419.4  456465.4 
##       121       122       123       124       125       126       127       128 
##  456465.4  456465.4  456465.4  715016.5  356419.4  456465.4  683984.8  683984.8 
##       129       130       131       132       133       134       135       136 
##  356419.4  356419.4  356419.4  356419.4  683984.8  356419.4  356419.4 1240699.1 
##       137       138       139       140       141       142       143       144 
##  356419.4  456465.4  683984.8  715016.5  356419.4  683984.8  356419.4  683984.8 
##       145       146       147       148       149       150       151       152 
##  715016.5 1368039.7  356419.4  356419.4  356419.4  456465.4  683984.8  456465.4 
##       153       154       155       156       157       158       159       160 
##  356419.4  356419.4  456465.4  356419.4  456465.4  683984.8  356419.4  715016.5 
##       161       162       163       164       165       166       167       168 
##  456465.4  356419.4  356419.4  456465.4  456465.4 1368039.7 1240699.1  683984.8 
##       169       170       171       172       173       174       175       176 
##  456465.4  356419.4  456465.4 2014519.0 1118283.2  356419.4  356419.4 2014519.0 
##       177       178       179       180       181       182       183       184 
##  356419.4 1240699.1  356419.4  456465.4  456465.4  456465.4 1368039.7  456465.4 
##       185       186       187       188       189       190       191       192 
##  456465.4  456465.4 1240699.1  456465.4  356419.4  456465.4  456465.4  456465.4 
##       193       194       195       196       197       198       199       200 
##  356419.4  356419.4  456465.4  356419.4  356419.4  683984.8  456465.4  356419.4 
##       201       202       203       204       205       206       207       208 
##  456465.4  683984.8  456465.4  356419.4  456465.4  356419.4  456465.4  456465.4 
##       209       210       211       212       213       214       215       216 
##  356419.4  356419.4  356419.4  456465.4  683984.8  456465.4  456465.4  456465.4 
##       217       218       219       220       221       222       223       224 
##  356419.4  456465.4  356419.4  715016.5  356419.4  356419.4  356419.4  715016.5 
##       225       226       227       228       229       230       231       232 
##  356419.4  456465.4  456465.4  456465.4  356419.4  715016.5  456465.4  456465.4 
##       233       234       235       236       237       238       239       240 
##  356419.4  356419.4 1240699.1 1118283.2  456465.4  356419.4  683984.8  356419.4 
##       241       242       243       244       245       246       247       248 
##  456465.4  456465.4  456465.4  715016.5  683984.8  356419.4  356419.4  456465.4 
##       249       250       251       252       253       254       255       256 
## 3399206.7 1240699.1  456465.4 1240699.1  683984.8  456465.4  683984.8  456465.4 
##       257       258       259       260       261       262       263       264 
##  456465.4 2014519.0  356419.4  715016.5 1118283.2  683984.8  715016.5  683984.8 
##       265       266       267       268       269       270       271       272 
##  683984.8  356419.4  356419.4  356419.4 1368039.7  715016.5  683984.8  456465.4 
##       273       274       275       276       277       278       279       280 
##  356419.4  356419.4  356419.4 1118283.2  456465.4  456465.4 1240699.1  356419.4 
##       281       282       283       284       285       286       287       288 
##  456465.4  356419.4  456465.4  456465.4 1118283.2  356419.4  683984.8  456465.4 
##       289       290       291       292       293       294       295       296 
##  356419.4 1368039.7  356419.4  356419.4 1118283.2  683984.8 1240699.1  356419.4 
##       297       298       299       300       301       302       303       304 
##  356419.4  356419.4  456465.4  356419.4 3816872.0  683984.8  356419.4  456465.4 
##       305       306       307       308       309       310       311       312 
## 1118283.2  456465.4  456465.4  456465.4  356419.4  456465.4  683984.8  456465.4 
##       313       314       315       316       317       318       319       320 
##  456465.4  356419.4  683984.8  683984.8  456465.4 1118283.2  456465.4  356419.4 
##       321       322       323       324       325       326       327       328 
##  715016.5  356419.4  456465.4  356419.4  356419.4  683984.8  356419.4  356419.4 
##       329       330       331       332       333       334       335       336 
##  683984.8  683984.8  356419.4  456465.4  356419.4  683984.8  356419.4  456465.4 
##       337       338       339       340       341       342       343       344 
##  356419.4  683984.8  356419.4  456465.4  356419.4  356419.4  456465.4 2014519.0 
##       345       346       347       348       349       350       351       352 
##  683984.8 1240699.1  456465.4  715016.5  356419.4  356419.4  683984.8  683984.8 
##       353       354       355       356       357       358       359       360 
##  356419.4  456465.4  356419.4  456465.4  456465.4  356419.4  356419.4  683984.8 
##       361       362       363       364       365       366       367       368 
##  683984.8  456465.4  356419.4  356419.4  456465.4  356419.4  456465.4  715016.5 
##       369       370       371       372       373       374       375       376 
##  683984.8  456465.4  683984.8  683984.8  683984.8  356419.4  683984.8  456465.4 
##       377       378       379       380       381       382       383       384 
##  456465.4  683984.8  456465.4  456465.4  456465.4  356419.4  356419.4 2014519.0 
##       385       386       387       388       389       390       391       392 
##  356419.4  356419.4  683984.8  356419.4  715016.5  683984.8  356419.4  456465.4 
##       393       394       395       396       397       398       399       400 
##  356419.4  683984.8  356419.4  356419.4  356419.4  456465.4  356419.4  356419.4 
##       401       402       403       404       405       406       407       408 
##  715016.5 1118283.2  683984.8 1118283.2  683984.8  456465.4  356419.4  456465.4 
##       409       410       411       412       413       414       415       416 
##  715016.5  356419.4  683984.8  356419.4  683984.8  356419.4  356419.4  356419.4 
##       417       418       419       420       421       422       423       424 
##  456465.4  456465.4  456465.4  356419.4  683984.8  683984.8  356419.4  456465.4 
##       425       426       427       428       429       430       431       432 
##  456465.4  456465.4  456465.4  356419.4  683984.8  456465.4  456465.4  683984.8 
##       433       434       435       436       437       438       439       440 
##  356419.4 1368039.7  456465.4  356419.4  715016.5  715016.5  356419.4  356419.4 
##       441       442       443       444       445       446       447       448 
##  683984.8  356419.4  715016.5  356419.4  356419.4  356419.4  683984.8  683984.8 
##       449       450       451       452       453       454       455       456 
##  683984.8  456465.4  683984.8  456465.4  456465.4 1118283.2  456465.4  356419.4 
##       457       458       459       460       461       462       463       464 
## 2014519.0  356419.4  456465.4 2014519.0 2014519.0  356419.4  356419.4  356419.4 
##       465       466       467       468       469       470       471       472 
##  683984.8  456465.4  456465.4  456465.4  715016.5  683984.8  456465.4  356419.4 
##       473       474       475       476       477       478       479       480 
## 3816872.0  456465.4  356419.4  456465.4  715016.5  683984.8  356419.4  683984.8 
##       481       482       483       484       485       486       487       488 
##  356419.4 1118283.2  356419.4  356419.4  456465.4  456465.4 1118283.2  356419.4 
##       489       490       491       492       493       494       495       496 
##  356419.4 2014519.0 1368039.7  356419.4  356419.4  683984.8  456465.4  683984.8 
##       497       498       499       500       501       502       503       504 
##  356419.4  356419.4 1368039.7  456465.4  456465.4  356419.4  715016.5  456465.4 
##       505       506       507       508       509       510       511       512 
##  456465.4  356419.4  456465.4  683984.8  356419.4  456465.4  356419.4  715016.5 
##       513       514       515       516       517       518       519       520 
## 1240699.1  456465.4  715016.5  456465.4  456465.4  356419.4  456465.4  683984.8 
##       521       522       523       524       525       526       527       528 
##  356419.4  456465.4  356419.4  356419.4  683984.8  356419.4 3816872.0  356419.4 
##       529       530       531       532       533       534       535       536 
##  715016.5  683984.8  683984.8  456465.4  456465.4  456465.4  683984.8  715016.5 
##       537       538       539       540       541       542       543       544 
##  456465.4 1240699.1  356419.4  356419.4  456465.4 1240699.1 1118283.2  456465.4 
##       545       546       547       548       549       550       551       552 
##  456465.4  456465.4  683984.8  356419.4  683984.8 1118283.2  456465.4 2014519.0 
##       553       554       555       556       557       558       559       560 
##  356419.4 1240699.1  356419.4 1118283.2  356419.4  683984.8  456465.4  356419.4 
##       561       562       563       564       565       566       567       568 
##  356419.4  356419.4 1368039.7  456465.4  356419.4 1240699.1 1118283.2  456465.4 
##       569       570       571       572       573       574       575       576 
##  356419.4  456465.4  456465.4  456465.4  683984.8 1240699.1  356419.4  356419.4 
##       577       578       579       580       581       582       583       584 
##  356419.4  683984.8  456465.4  456465.4  356419.4  456465.4  683984.8  356419.4 
##       585       586       587       588       589       590       591       592 
## 1240699.1  356419.4  683984.8  356419.4  356419.4 1240699.1  456465.4  356419.4 
##       593       594       595       596       597       598       599       600 
## 1240699.1  356419.4  456465.4  683984.8  456465.4  456465.4  683984.8  456465.4 
##       601       602       603       604       605       606       607       608 
##  356419.4  683984.8  456465.4  356419.4  683984.8 2014519.0  683984.8  456465.4 
##       609       610       611       612       613       614       615       616 
##  683984.8  456465.4  456465.4  456465.4  715016.5  356419.4 1118283.2  356419.4 
##       617       618       619       620       621       622       623       624 
##  356419.4  456465.4  356419.4  456465.4  715016.5  456465.4  456465.4  683984.8 
##       625       626       627       628       629       630       631       632 
##  456465.4  356419.4  356419.4  456465.4  456465.4  456465.4  456465.4  356419.4 
##       633       634       635       636       637       638       639       640 
##  456465.4 1368039.7  456465.4  356419.4  356419.4  356419.4  683984.8  683984.8 
##       641       642       643       644       645       646       647       648 
##  456465.4  356419.4  356419.4  456465.4  715016.5  356419.4  456465.4  356419.4 
##       649       650       651       652       653       654       655       656 
## 2014519.0 1118283.2  456465.4  356419.4  356419.4  456465.4  456465.4  356419.4 
##       657       658       659       660       661       662       663       664 
## 1240699.1  683984.8  356419.4  456465.4  356419.4 1118283.2  356419.4  356419.4 
##       665       666       667       668       669       670       671       672 
##  683984.8  356419.4  456465.4 2014519.0  456465.4  456465.4  683984.8  715016.5 
##       673       674       675       676       677       678       679       680 
## 1118283.2  683984.8  456465.4  356419.4 2014519.0  356419.4  356419.4  456465.4 
##       681       682       683       684       685       686       687       688 
##  683984.8  356419.4  356419.4  456465.4  456465.4  356419.4  356419.4  456465.4 
##       689       690       691       692       693       694       695       696 
##  456465.4  456465.4  456465.4  356419.4  456465.4  356419.4  456465.4  356419.4 
##       697       698       699       700       701       702       703       704 
##  683984.8  356419.4 1240699.1 2014519.0  456465.4  456465.4  456465.4  356419.4 
##       705       706       707       708       709       710       711       712 
##  456465.4  456465.4  356419.4  356419.4  456465.4  683984.8  356419.4  456465.4 
##       713       714       715       716       717       718       719       720 
##  456465.4  356419.4  456465.4  356419.4  683984.8  456465.4  356419.4  356419.4 
##       721       722       723       724       725       726       727       728 
##  456465.4  456465.4  356419.4  456465.4  456465.4  456465.4  356419.4  356419.4 
##       729       730       731       732       733       734       735       736 
##  356419.4  683984.8  356419.4  356419.4  456465.4  356419.4  356419.4 1118283.2 
##       737       738       739       740       741       742       743       744 
##  456465.4  456465.4  456465.4  356419.4  356419.4  683984.8  356419.4  356419.4 
##       745       746       747       748       749       750       751       752 
##  356419.4  356419.4  356419.4  456465.4 1118283.2 1240699.1 1240699.1  356419.4 
##       753       754       755       756       757       758       759       760 
##  456465.4  456465.4  683984.8  456465.4  356419.4  683984.8  356419.4  456465.4 
##       761       762       763       764       765       766       767       768 
##  683984.8  456465.4  356419.4  356419.4  356419.4 1118283.2  356419.4  683984.8 
##       769       770       771       772       773       774       775       776 
##  715016.5  456465.4  715016.5  356419.4  356419.4  683984.8  356419.4  456465.4 
##       777       778       779       780       781       782       783       784 
## 2014519.0  456465.4  456465.4  356419.4  356419.4  356419.4  356419.4  356419.4 
##       785       786       787       788       789       790       791       792 
## 2014519.0  683984.8  456465.4  456465.4  715016.5  356419.4  456465.4  683984.8 
##       793       794       795       796       797       798       799       800 
##  683984.8 1240699.1  356419.4  356419.4  356419.4  683984.8  456465.4  356419.4 
##       801       802       803       804       805       806       807       808 
## 1118283.2 1240699.1  456465.4  456465.4  683984.8  456465.4 1118283.2  683984.8 
##       809       810       811       812       813       814       815       816 
## 2014519.0  456465.4  715016.5  356419.4  356419.4  356419.4  356419.4  356419.4 
##       817       818       819       820       821       822       823       824 
##  356419.4  683984.8  683984.8  356419.4  683984.8 2014519.0  356419.4  683984.8 
##       825       826       827       828       829       830       831       832 
##  683984.8  356419.4  356419.4  356419.4  683984.8  456465.4  356419.4  456465.4 
##       833       834       835       836       837       838       839       840 
##  356419.4  356419.4  456465.4  456465.4  356419.4  683984.8  356419.4  683984.8 
##       841       842       843       844       845       846       847       848 
##  456465.4  356419.4 1118283.2  456465.4  683984.8  456465.4  683984.8  683984.8 
##       849       850       851       852       853       854       855       856 
## 1118283.2  715016.5  715016.5  456465.4  456465.4  683984.8  456465.4  456465.4 
##       857       858       859       860       861       862       863       864 
##  456465.4  356419.4  683984.8  456465.4  683984.8  456465.4 1240699.1  456465.4 
##       865       866       867       868       869       870       871       872 
##  456465.4  356419.4  715016.5 1240699.1  356419.4  456465.4  683984.8  456465.4 
##       873       874       875       876       877       878       879       880 
##  356419.4  456465.4  356419.4  356419.4  356419.4  683984.8 1368039.7  456465.4 
##       881       882       883       884       885       886       887       888 
##  456465.4 1240699.1  356419.4  356419.4  356419.4  456465.4  356419.4  683984.8 
##       889       890       891       892       893       894       895       896 
##  456465.4  356419.4  683984.8  456465.4  456465.4  715016.5  456465.4  356419.4 
##       897       898       899       900       901       902       903       904 
##  356419.4  356419.4  715016.5  683984.8  715016.5  356419.4  683984.8  356419.4 
##       905       906       907       908       909       910       911       912 
##  356419.4  683984.8  683984.8  356419.4  356419.4  356419.4  456465.4  356419.4 
##       913       914       915       916       917       918       919       920 
##  456465.4  356419.4  715016.5  356419.4  683984.8  456465.4  683984.8  456465.4 
##       921       922       923       924       925       926       927       928 
##  456465.4  715016.5  356419.4  456465.4  356419.4  456465.4  456465.4  683984.8 
##       929       930       931       932       933       934       935       936 
##  456465.4  683984.8  356419.4  356419.4  356419.4  356419.4  356419.4  356419.4 
##       937       938       939       940       941       942       943       944 
##  456465.4  683984.8  356419.4  683984.8  683984.8  356419.4  456465.4  356419.4 
##       945       946       947       948       949       950       951       952 
##  356419.4  356419.4  356419.4  683984.8  683984.8  683984.8  356419.4  356419.4 
##       953       954       955       956       957       958       959       960 
##  683984.8  456465.4  456465.4  356419.4  456465.4  683984.8  456465.4  356419.4 
##       961       962       963       964       965       966       967       968 
##  456465.4  456465.4  683984.8  456465.4  683984.8  456465.4  456465.4  715016.5 
##       969       970       971       972       973       974       975       976 
## 1118283.2 1240699.1  356419.4  456465.4  456465.4  456465.4  456465.4  456465.4 
##       977       978       979       980       981       982       983       984 
##  456465.4  715016.5  456465.4  683984.8  456465.4  356419.4  356419.4  456465.4 
##       985       986       987       988       989       990       991       992 
## 1240699.1  456465.4  456465.4  683984.8  456465.4  356419.4  715016.5  456465.4 
##       993       994       995       996       997       998       999      1000 
##  356419.4 1240699.1  356419.4  456465.4  356419.4  356419.4  683984.8  456465.4 
##      1001      1002      1003      1004      1005      1006      1007      1008 
##  456465.4  683984.8  683984.8  456465.4  456465.4  456465.4  356419.4  356419.4 
##      1009      1010      1011      1012      1013      1014      1015      1016 
##  356419.4  683984.8  356419.4  456465.4 1240699.1  356419.4  356419.4  456465.4 
##      1017      1018      1019      1020      1021      1022      1023      1024 
##  356419.4  456465.4  683984.8  356419.4  356419.4  456465.4  456465.4  456465.4 
##      1025      1026      1027      1028      1029      1030      1031      1032 
##  456465.4  683984.8 1368039.7  356419.4  356419.4 1240699.1  456465.4 1368039.7 
##      1033      1034      1035      1036      1037      1038      1039      1040 
##  456465.4  356419.4  356419.4  456465.4  456465.4  356419.4  683984.8  356419.4 
##      1041      1042      1043      1044      1045      1046      1047      1048 
##  356419.4  356419.4  456465.4  356419.4  356419.4  683984.8  356419.4  356419.4 
##      1049      1050      1051      1052      1053      1054      1055      1056 
##  356419.4  456465.4  683984.8  456465.4  356419.4  715016.5  456465.4 1240699.1 
##      1057      1058      1059      1060      1061      1062      1063      1064 
##  456465.4  456465.4 2014519.0  456465.4 1240699.1  683984.8  356419.4  456465.4 
##      1065      1066      1067      1068      1069      1070      1071      1072 
##  715016.5  715016.5  683984.8 2014519.0  356419.4  456465.4 1118283.2  456465.4 
##      1073      1074      1075      1076      1077      1078      1079      1080 
##  356419.4  456465.4  356419.4  683984.8  456465.4  683984.8  356419.4  356419.4 
##      1081      1082      1083      1084      1085      1086      1087      1088 
##  356419.4  356419.4  456465.4  456465.4  456465.4  683984.8  456465.4  715016.5 
##      1089      1090      1091      1092      1093      1094      1095      1096 
##  683984.8  356419.4  456465.4  683984.8  356419.4  356419.4  683984.8  456465.4 
##      1097      1098      1099      1100      1101      1102      1103      1104 
##  356419.4  456465.4  356419.4  356419.4  456465.4 2014519.0  683984.8  715016.5 
##      1105      1106      1107      1108      1109      1110      1111      1112 
##  356419.4  456465.4  683984.8 2014519.0  356419.4  683984.8  356419.4 2014519.0 
##      1113      1114      1115      1116      1117      1118      1119      1120 
##  683984.8  456465.4  356419.4  715016.5  456465.4  683984.8  456465.4  456465.4 
##      1121      1122      1123      1124      1125      1126      1127      1128 
##  356419.4  456465.4  356419.4  683984.8  683984.8  456465.4  456465.4  456465.4 
##      1129      1130      1131      1132      1133      1134      1135      1136 
##  456465.4  456465.4  456465.4  456465.4  356419.4  456465.4 1240699.1  683984.8 
##      1137      1138      1139      1140      1141      1142      1143      1144 
##  356419.4  683984.8  683984.8  356419.4  456465.4  356419.4  356419.4  356419.4 
##      1145      1146      1147      1148      1149      1150      1151      1152 
##  456465.4 1118283.2  715016.5  683984.8  356419.4  356419.4  356419.4 1240699.1 
##      1153      1154      1155      1156      1157      1158      1159      1160 
##  356419.4  456465.4  356419.4 1240699.1  456465.4  683984.8  456465.4  356419.4 
##      1161      1162      1163      1164      1165      1166      1167      1168 
##  683984.8  683984.8  683984.8  683984.8  356419.4  683984.8  356419.4  456465.4 
##      1169      1170      1171      1172      1173      1174      1175      1176 
##  456465.4  683984.8  715016.5  356419.4  356419.4  356419.4  683984.8  456465.4 
##      1177      1178      1179      1180      1181      1182      1183      1184 
##  356419.4 1240699.1  456465.4  683984.8  683984.8  456465.4  456465.4  715016.5 
##      1185      1186      1187      1188      1189      1190      1191      1192 
##  456465.4  356419.4 1368039.7  356419.4  356419.4  356419.4  683984.8  356419.4 
##      1193      1194      1195      1196      1197      1198      1199      1200 
##  356419.4 1240699.1  456465.4 1368039.7  715016.5  356419.4  456465.4  456465.4 
##      1201      1202      1203      1204      1205      1206      1207      1208 
##  683984.8 1240699.1  356419.4  456465.4  456465.4  356419.4  683984.8  456465.4 
##      1209      1210      1211      1212      1213      1214      1215      1216 
##  356419.4  456465.4  356419.4  683984.8  356419.4  683984.8  356419.4  456465.4 
##      1217      1218      1219      1220      1221      1222      1223      1224 
##  456465.4  715016.5  456465.4  683984.8  456465.4 1240699.1  356419.4  456465.4 
##      1225      1226      1227      1228      1229      1230      1231      1232 
##  456465.4  356419.4  683984.8  683984.8  356419.4  683984.8  456465.4  456465.4 
##      1233      1234      1235      1236      1237      1238      1239      1240 
##  356419.4  356419.4  356419.4  683984.8  456465.4  683984.8  456465.4  356419.4 
##      1241      1242      1243      1244      1245      1246      1247      1248 
## 1240699.1  456465.4  356419.4  683984.8 1240699.1  456465.4  356419.4  683984.8 
##      1249      1250      1251      1252      1253      1254      1255      1256 
##  683984.8  356419.4  456465.4  456465.4  683984.8  456465.4  356419.4 1118283.2 
##      1257      1258      1259      1260      1261      1262      1263      1264 
##  456465.4  715016.5  356419.4  456465.4  356419.4  456465.4  456465.4  683984.8 
##      1265      1266      1267      1268      1269      1270      1271      1272 
##  456465.4  456465.4  356419.4  456465.4  715016.5  356419.4  683984.8  356419.4 
##      1273      1274      1275      1276      1277      1278      1279      1280 
##  456465.4  456465.4  456465.4 2014519.0  456465.4  456465.4  456465.4  456465.4 
##      1281      1282      1283      1284      1285      1286      1287      1288 
##  456465.4  356419.4  356419.4  715016.5  356419.4 2014519.0  456465.4  356419.4 
##      1289      1290      1291      1292      1293      1294      1295      1296 
## 1240699.1  356419.4  356419.4  683984.8  456465.4 1118283.2  683984.8  683984.8 
##      1297      1298      1299      1300      1301      1302      1303      1304 
##  356419.4  356419.4  683984.8 1240699.1  456465.4  356419.4  456465.4  356419.4 
##      1305      1306      1307      1308      1309      1310      1311      1312 
## 1240699.1  683984.8  456465.4  356419.4 1240699.1 3816872.0  356419.4  356419.4 
##      1313      1314      1315      1316      1317      1318      1319      1320 
##  356419.4 1240699.1  356419.4  456465.4 1240699.1  683984.8  356419.4  683984.8 
##      1321      1322      1323      1324      1325      1326      1327      1328 
##  456465.4  456465.4  356419.4  683984.8  683984.8  356419.4  683984.8  456465.4 
##      1329      1330      1331      1332      1333      1334      1335      1336 
##  683984.8  456465.4  356419.4  456465.4  356419.4  683984.8  356419.4  456465.4 
##      1337      1338      1339      1340      1341      1342      1343      1344 
##  356419.4  356419.4 1240699.1  456465.4  715016.5 1240699.1  356419.4 1118283.2 
##      1345      1346      1347      1348      1349      1350      1351      1352 
##  683984.8  683984.8  715016.5  715016.5  715016.5  356419.4  683984.8  356419.4 
##      1353      1354      1355      1356      1357      1358      1359      1360 
##  683984.8  356419.4  356419.4  683984.8  456465.4  683984.8 1368039.7  456465.4 
##      1361      1362      1363      1364      1365      1366      1367      1368 
## 1240699.1  683984.8  456465.4  356419.4  456465.4  356419.4  456465.4  356419.4 
##      1369      1370      1371      1372      1373      1374      1375      1376 
##  456465.4  683984.8  456465.4 1118283.2  356419.4  356419.4  683984.8  356419.4 
##      1377      1378      1379      1380      1381      1382      1383      1384 
##  456465.4  356419.4  456465.4  715016.5  356419.4 1368039.7  356419.4  356419.4 
##      1385      1386      1387      1388      1389      1390      1391      1392 
##  456465.4  356419.4  683984.8  683984.8  356419.4 1240699.1 1368039.7  456465.4 
##      1393      1394      1395      1396      1397      1398      1399      1400 
##  356419.4  356419.4  683984.8  456465.4  356419.4  456465.4  356419.4  683984.8 
##      1401      1402      1403      1404      1405      1406      1407      1408 
##  356419.4  456465.4  456465.4  456465.4  683984.8  356419.4  356419.4  715016.5 
##      1409      1410      1411      1412      1413      1414      1415      1416 
##  456465.4  683984.8  456465.4  356419.4  356419.4  356419.4 1240699.1  456465.4 
##      1417      1418      1419      1420      1421      1422      1423      1424 
##  356419.4 1240699.1 1368039.7  356419.4  456465.4  456465.4  456465.4  456465.4 
##      1425      1426      1427      1428      1429      1430      1431      1432 
##  715016.5  456465.4  683984.8  356419.4  356419.4  356419.4 1240699.1  456465.4 
##      1433      1434      1435      1436      1437      1438      1439      1440 
##  683984.8  456465.4  356419.4  356419.4  356419.4  683984.8  356419.4  683984.8 
##      1441      1442      1443      1444      1445      1446      1447      1448 
##  683984.8  356419.4 3816872.0  356419.4  456465.4  715016.5  715016.5  356419.4 
##      1449      1450      1451      1452      1453      1454      1455      1456 
##  356419.4 1240699.1  356419.4  356419.4  356419.4  456465.4  456465.4  456465.4 
##      1457      1458      1459      1460      1461      1462      1463      1464 
##  356419.4  456465.4  683984.8  356419.4  683984.8  456465.4  356419.4  356419.4 
##      1465      1466      1467      1468      1469      1470      1471      1472 
##  456465.4  356419.4  683984.8  456465.4 1118283.2  456465.4 2014519.0  683984.8 
##      1473      1474      1475      1476      1477      1478      1479      1480 
## 3816872.0  456465.4  715016.5 1118283.2  356419.4  456465.4  456465.4  356419.4 
##      1481      1482      1483      1484      1485      1486      1487      1488 
##  356419.4  356419.4  356419.4  356419.4  715016.5  683984.8  356419.4  715016.5 
##      1489      1490      1491      1492      1493      1494      1495      1496 
##  356419.4  356419.4  683984.8  356419.4  683984.8  456465.4  683984.8  356419.4 
##      1497      1498      1499      1500      1501      1502      1503      1504 
##  356419.4  356419.4  456465.4  356419.4  356419.4  456465.4 1240699.1  356419.4 
##      1505      1506      1507      1508      1509      1510      1511      1512 
##  715016.5  356419.4  356419.4  356419.4  356419.4  715016.5  456465.4  356419.4 
##      1513      1514      1515      1516      1517      1518      1519      1520 
## 1240699.1  456465.4  683984.8  356419.4  456465.4  456465.4  356419.4  456465.4 
##      1521      1522      1523      1524      1525      1526      1527      1528 
##  356419.4 1118283.2  683984.8  683984.8  456465.4  683984.8  456465.4  456465.4 
##      1529      1530      1531      1532      1533      1534      1535      1536 
##  456465.4 1240699.1  683984.8  356419.4  456465.4 2014519.0  456465.4  683984.8 
##      1537      1538      1539      1540      1541      1542      1543      1544 
##  356419.4  715016.5  683984.8  456465.4  356419.4  356419.4  456465.4  456465.4 
##      1545      1546      1547      1548      1549      1550      1551      1552 
##  356419.4  356419.4  683984.8  356419.4  456465.4 1118283.2  356419.4  356419.4 
##      1553      1554      1555      1556      1557      1558      1559      1560 
##  456465.4  356419.4  356419.4  356419.4  715016.5  683984.8  356419.4 2014519.0 
##      1561      1562      1563      1564      1565      1566      1567      1568 
## 1240699.1  683984.8 2014519.0  356419.4  683984.8  456465.4  356419.4  456465.4 
##      1569      1570      1571      1572      1573      1574      1575      1576 
##  456465.4  456465.4  456465.4  356419.4  456465.4  456465.4  356419.4  356419.4 
##      1577      1578      1579      1580      1581      1582      1583      1584 
## 1368039.7  456465.4  456465.4  456465.4  356419.4  715016.5 1240699.1  456465.4 
##      1585      1586      1587      1588      1589      1590      1591      1592 
##  683984.8  456465.4  356419.4  456465.4  456465.4 7939320.0  356419.4 1240699.1 
##      1593      1594      1595      1596      1597      1598      1599      1600 
##  456465.4  683984.8  683984.8  683984.8  356419.4  456465.4  456465.4  456465.4 
##      1601      1602      1603      1604      1605      1606      1607      1608 
##  356419.4  683984.8  356419.4 1240699.1  683984.8  456465.4  456465.4  456465.4 
##      1609      1610      1611      1612      1613      1614      1615      1616 
##  683984.8  456465.4  683984.8  683984.8 1240699.1  356419.4  356419.4 1368039.7 
##      1617      1618      1619      1620      1621      1622      1623      1624 
##  356419.4  683984.8  356419.4  456465.4  683984.8  715016.5 1118283.2  356419.4 
##      1625      1626      1627      1628      1629      1630      1631      1632 
##  356419.4  683984.8  456465.4  456465.4  456465.4  456465.4  456465.4  683984.8 
##      1633      1634      1635      1636      1637      1638      1639      1640 
##  683984.8  683984.8  356419.4  715016.5  456465.4  683984.8  456465.4  356419.4 
##      1641      1642      1643      1644      1645      1646      1647      1648 
##  683984.8  715016.5  356419.4  715016.5  456465.4  456465.4  456465.4  356419.4 
##      1649      1650      1651      1652      1653      1654      1655      1656 
##  683984.8  356419.4  683984.8  683984.8  683984.8  456465.4  356419.4  356419.4 
##      1657      1658      1659      1660      1661      1662      1663      1664 
##  356419.4  683984.8  456465.4  356419.4  456465.4  456465.4  456465.4  683984.8 
##      1665      1666      1667      1668      1669      1670      1671      1672 
##  456465.4  356419.4  356419.4  356419.4  356419.4  683984.8  356419.4  456465.4 
##      1673      1674      1675      1676      1677      1678      1679      1680 
##  356419.4  715016.5  456465.4  715016.5  456465.4  456465.4 1240699.1  356419.4 
##      1681      1682      1683      1684      1685      1686      1687      1688 
##  456465.4  356419.4  683984.8  356419.4 1240699.1 1118283.2  683984.8  456465.4 
##      1689      1690      1691      1692      1693      1694      1695      1696 
##  356419.4  456465.4  356419.4  683984.8  456465.4  356419.4  715016.5  356419.4 
##      1697      1698      1699      1700      1701      1702      1703      1704 
## 2014519.0  356419.4  356419.4  683984.8 2014519.0  456465.4  456465.4 3399206.7 
##      1705      1706      1707      1708      1709      1710      1711      1712 
##  456465.4  456465.4  456465.4  456465.4  715016.5  456465.4  456465.4  683984.8 
##      1713      1714      1715      1716      1717      1718      1719      1720 
##  456465.4  356419.4  356419.4  683984.8  456465.4  356419.4  356419.4  456465.4 
##      1721      1722      1723      1724      1725      1726      1727      1728 
##  456465.4 1118283.2  356419.4  356419.4  456465.4  356419.4  683984.8  456465.4 
##      1729      1730      1731      1732      1733      1734      1735      1736 
## 1368039.7  456465.4  683984.8  683984.8  456465.4  456465.4  356419.4  356419.4 
##      1737      1738      1739      1740      1741      1742      1743      1744 
##  456465.4  456465.4  456465.4  456465.4  456465.4  683984.8  456465.4  356419.4 
##      1745      1746      1747      1748      1749      1750      1751      1752 
##  683984.8  456465.4  356419.4  715016.5  356419.4 1240699.1  456465.4  456465.4 
##      1753      1754      1755      1756      1757      1758      1759      1760 
##  456465.4  456465.4  356419.4  356419.4  356419.4  456465.4  683984.8  683984.8 
##      1761      1762      1763      1764      1765      1766      1767      1768 
##  356419.4  356419.4  456465.4  356419.4  356419.4  456465.4  456465.4  456465.4 
##      1769      1770      1771      1772      1773      1774      1775      1776 
##  356419.4  683984.8  356419.4  356419.4  356419.4  456465.4 1118283.2  456465.4 
##      1777      1778      1779      1780      1781      1782      1783      1784 
##  356419.4  683984.8  356419.4  356419.4  356419.4  715016.5  356419.4  456465.4 
##      1785      1786      1787      1788      1789      1790      1791      1792 
##  456465.4 1118283.2  683984.8  456465.4 2014519.0  356419.4  456465.4  456465.4 
##      1793      1794      1795      1796      1797      1798      1799      1800 
##  456465.4 1118283.2  456465.4  715016.5  456465.4  356419.4  456465.4  683984.8 
##      1801      1802      1803      1804      1805      1806      1807      1808 
##  456465.4  356419.4 1240699.1  356419.4  456465.4  356419.4  683984.8  456465.4 
##      1809      1810      1811      1812      1813      1814      1815      1816 
##  456465.4  456465.4  456465.4  456465.4  715016.5  456465.4  456465.4  356419.4 
##      1817      1818      1819      1820      1821      1822      1823      1824 
##  456465.4  683984.8  356419.4  356419.4  356419.4  356419.4  356419.4 1118283.2 
##      1825      1826      1827      1828      1829      1830      1831      1832 
##  683984.8  356419.4  456465.4  683984.8  456465.4  356419.4  456465.4  356419.4 
##      1833      1834      1835      1836      1837      1838      1839      1840 
##  356419.4 1118283.2  715016.5  456465.4  356419.4  456465.4  356419.4  715016.5 
##      1841      1842      1843      1844      1845      1846      1847      1848 
##  456465.4  456465.4  356419.4  456465.4  683984.8  715016.5  456465.4 1118283.2 
##      1849      1850      1851      1852      1853      1854      1855      1856 
## 3399206.7 2014519.0  715016.5 1240699.1  356419.4 1240699.1  356419.4  683984.8 
##      1857      1858      1859      1860      1861      1862      1863      1864 
## 2014519.0  456465.4  356419.4  715016.5  456465.4  356419.4  456465.4  356419.4 
##      1865      1866      1867      1868      1869      1870      1871      1872 
##  715016.5  715016.5  456465.4  356419.4  356419.4  715016.5 1240699.1  683984.8 
##      1873      1874      1875      1876      1877      1878      1879      1880 
## 1368039.7  356419.4  356419.4  356419.4 1368039.7  356419.4  456465.4  683984.8 
##      1881      1882      1883      1884      1885      1886      1887      1888 
##  456465.4  683984.8  356419.4  715016.5  683984.8 1240699.1  683984.8  456465.4 
##      1889      1890      1891      1892      1893      1894      1895      1896 
##  456465.4  356419.4  683984.8  456465.4  356419.4  356419.4  356419.4  356419.4 
##      1897      1898      1899      1900      1901      1902      1903      1904 
## 1118283.2  456465.4  456465.4  456465.4  456465.4  456465.4  683984.8  683984.8 
##      1905      1906      1907      1908      1909      1910      1911      1912 
##  356419.4  683984.8  356419.4  456465.4  456465.4  456465.4  683984.8  356419.4 
##      1913      1914      1915      1916      1917      1918      1919      1920 
##  356419.4  456465.4  356419.4  715016.5  456465.4  456465.4  456465.4  456465.4 
##      1921      1922      1923      1924      1925      1926      1927      1928 
##  715016.5 1240699.1  683984.8 1118283.2  356419.4  356419.4  356419.4  456465.4 
##      1929      1930      1931      1932      1933      1934      1935      1936 
##  356419.4  683984.8  356419.4  456465.4  356419.4  683984.8  456465.4  683984.8 
##      1937      1938      1939      1940      1941      1942      1943      1944 
##  683984.8  356419.4  456465.4  356419.4  356419.4  356419.4  356419.4  356419.4 
##      1945      1946      1947      1948      1949      1950      1951      1952 
##  356419.4 1240699.1  356419.4  456465.4  683984.8  683984.8  456465.4  356419.4 
##      1953      1954      1955      1956      1957      1958      1959      1960 
##  715016.5  683984.8  715016.5  356419.4  456465.4  683984.8  456465.4  356419.4 
##      1961      1962      1963      1964      1965      1966      1967      1968 
##  356419.4 1118283.2  456465.4  683984.8 1240699.1  356419.4  683984.8  456465.4 
##      1969      1970      1971      1972      1973      1974      1975      1976 
##  456465.4  356419.4  356419.4  356419.4  456465.4  356419.4  356419.4  356419.4 
##      1977      1978      1979      1980      1981      1982      1983      1984 
##  356419.4 1118283.2  356419.4  456465.4  683984.8  356419.4  715016.5  456465.4 
##      1985      1986      1987      1988      1989      1990      1991      1992 
##  715016.5  356419.4  356419.4  456465.4  683984.8  715016.5  715016.5  356419.4

We can see from the result that the RMSE reached 294609.8, which is 1.1% higher than the RMSE obtained from the linear regression (LR) model. This is the case probably because the tree model predicts the price range instead of the exact price as LR does. Therefore the error is higher. Rsquared reached 69.0%, which is 2.5% higher than that of LR model. This is the case probably because many relationships between independent variables and dependent variable are non-linear, and LR fails to detect the non-linear relationship. Tree model is able to capture the non-linear relationship. Therefore, Rsquared is higher.

6 Other algorithms

Use at least two other algorithms to predict prices. Don’t forget to tune the parameters of these algorithms. And then compare the performances of your algorithms to linear regression and trees.

set.seed(1)
model3 <- train(
  price ~ latitude+longitude+london_zone+total_floor_area+crime_number+school_number+distance_to_station+co2_emissions_current+property_type,
  train_data,
  method = "knn",
  trControl = control,
  tuneLength=10
    )
## + Fold1: k= 5 
## - Fold1: k= 5 
## + Fold1: k= 7 
## - Fold1: k= 7 
## + Fold1: k= 9 
## - Fold1: k= 9 
## + Fold1: k=11 
## - Fold1: k=11 
## + Fold1: k=13 
## - Fold1: k=13 
## + Fold1: k=15 
## - Fold1: k=15 
## + Fold1: k=17 
## - Fold1: k=17 
## + Fold1: k=19 
## - Fold1: k=19 
## + Fold1: k=21 
## - Fold1: k=21 
## + Fold1: k=23 
## - Fold1: k=23 
## + Fold2: k= 5 
## - Fold2: k= 5 
## + Fold2: k= 7 
## - Fold2: k= 7 
## + Fold2: k= 9 
## - Fold2: k= 9 
## + Fold2: k=11 
## - Fold2: k=11 
## + Fold2: k=13 
## - Fold2: k=13 
## + Fold2: k=15 
## - Fold2: k=15 
## + Fold2: k=17 
## - Fold2: k=17 
## + Fold2: k=19 
## - Fold2: k=19 
## + Fold2: k=21 
## - Fold2: k=21 
## + Fold2: k=23 
## - Fold2: k=23 
## + Fold3: k= 5 
## - Fold3: k= 5 
## + Fold3: k= 7 
## - Fold3: k= 7 
## + Fold3: k= 9 
## - Fold3: k= 9 
## + Fold3: k=11 
## - Fold3: k=11 
## + Fold3: k=13 
## - Fold3: k=13 
## + Fold3: k=15 
## - Fold3: k=15 
## + Fold3: k=17 
## - Fold3: k=17 
## + Fold3: k=19 
## - Fold3: k=19 
## + Fold3: k=21 
## - Fold3: k=21 
## + Fold3: k=23 
## - Fold3: k=23 
## + Fold4: k= 5 
## - Fold4: k= 5 
## + Fold4: k= 7 
## - Fold4: k= 7 
## + Fold4: k= 9 
## - Fold4: k= 9 
## + Fold4: k=11 
## - Fold4: k=11 
## + Fold4: k=13 
## - Fold4: k=13 
## + Fold4: k=15 
## - Fold4: k=15 
## + Fold4: k=17 
## - Fold4: k=17 
## + Fold4: k=19 
## - Fold4: k=19 
## + Fold4: k=21 
## - Fold4: k=21 
## + Fold4: k=23 
## - Fold4: k=23 
## + Fold5: k= 5 
## - Fold5: k= 5 
## + Fold5: k= 7 
## - Fold5: k= 7 
## + Fold5: k= 9 
## - Fold5: k= 9 
## + Fold5: k=11 
## - Fold5: k=11 
## + Fold5: k=13 
## - Fold5: k=13 
## + Fold5: k=15 
## - Fold5: k=15 
## + Fold5: k=17 
## - Fold5: k=17 
## + Fold5: k=19 
## - Fold5: k=19 
## + Fold5: k=21 
## - Fold5: k=21 
## + Fold5: k=23 
## - Fold5: k=23 
## Aggregating results
## Selecting tuning parameters
## Fitting k = 7 on full training set
# view how the tree performs
model3$results
##     k     RMSE  Rsquared      MAE   RMSESD RsquaredSD    MAESD
## 1   5 268334.7 0.7419127 122908.1 35002.73 0.04531215 6972.112
## 2   7 264300.5 0.7513824 121081.2 34503.87 0.04418249 6622.188
## 3   9 269796.0 0.7429650 121359.7 31435.99 0.03421529 6380.043
## 4  11 271217.9 0.7405854 121333.5 27911.04 0.02584640 6058.587
## 5  13 271880.1 0.7413560 121822.1 26284.24 0.02129176 6006.266
## 6  15 273521.4 0.7394451 122017.0 26610.99 0.02218062 5647.180
## 7  17 276592.0 0.7346948 122582.0 27617.31 0.02040363 5854.606
## 8  19 278407.9 0.7327221 122754.8 28946.91 0.02280756 6046.439
## 9  21 280663.9 0.7301074 123176.6 30938.59 0.02628859 5946.114
## 10 23 281850.0 0.7293300 123681.0 30807.98 0.02549628 5944.449
knn_performance <- data.frame(k=model3$results$k,
                RMSE=model3$results$RMSE,
                Rsquared=model3$results$Rsquared)

highlight <- knn_performance %>% 
             filter(k==7)

# RMSE for different K
ggplot(data=knn_performance, aes(x=k, y=RMSE, group=1)) +
  geom_line()+
  geom_point()+
  geom_point(data=highlight, 
             aes(x=k,y=RMSE), 
             color='red',
             size=3) +
  theme_minimal() +
  labs(title = "Model achieved lowest RMSE at K=7",
       subtitle = "RMSE with different K values",
       x = "K",
       y = "RMSE")

# Rsquared for different K
ggplot(data=knn_performance, aes(x=k, y=Rsquared, group=1)) +
  geom_line()+
  geom_point()+
  geom_point(data=highlight, 
             aes(x=k,y=Rsquared), 
             color='red',
             size=3) +
  theme_minimal() +
  labs(title = "Model achieved highest Rsquared at K=7",
       subtitle = "Rsquared with different K values",
       x = "K",
       y = "Rsquared")

We can see that performance achieved its best at k=7.

model3_knn <- train(
  price ~ latitude+longitude+london_zone+total_floor_area+crime_number+school_number+distance_to_station+co2_emissions_current+property_type,
  train_data,
  method = "knn",
  trControl = control,
  tuneGrid = expand.grid(k = 7) #optimal K
    )
## + Fold1: k=7 
## - Fold1: k=7 
## + Fold2: k=7 
## - Fold2: k=7 
## + Fold3: k=7 
## - Fold3: k=7 
## + Fold4: k=7 
## - Fold4: k=7 
## + Fold5: k=7 
## - Fold5: k=7 
## Aggregating results
## Fitting final model on full training set
#you can also visualize the variable importance
importance <- varImp(model3_knn, scale=TRUE)
plot(importance)

predictions <- predict(model3_knn,test_data)

knn_results<-data.frame(  RMSE = RMSE(predictions, test_data$price),
                            Rsquare = R2(predictions, test_data$price))


knn_results
##       RMSE   Rsquare
## 1 244964.8 0.7634802
#We can predict prices for out of sample data the same way
predictions_oos <- predict(model3_knn,london_house_prices_2019_out_of_sample)
predictions_oos
##    [1]  896682.3  332135.7  441000.0  470000.0  986642.9  525550.0  873857.1
##    [8]  399785.7  735428.6  556213.6  399000.0 1000895.3  463000.0  809714.3
##   [15]  411857.1  486714.3  331714.3  284428.6  989428.6 1073571.4  523617.9
##   [22]  279642.9  267135.7 1295714.3  797071.4  586357.1  263214.3  273857.1
##   [29]  782500.0  447142.9  412714.3  464028.6  750857.1  819642.9  629142.9
##   [36]  424214.3  844971.4  255857.1  595992.9  655714.3  341428.6  986609.6
##   [43]  410571.4  634642.9  424107.1  451422.4  342857.1  567000.0  540642.9
##   [50]  305428.6  459285.7  373428.6  429571.4  364035.7  490642.9  646928.6
##   [57]  481714.3  360098.4  529857.1  300421.4 3503571.3  237928.6  351571.4
##   [64]  686600.0  191071.4  246285.7  763214.3  947857.1  787285.7  345971.1
##   [71]  603928.6  561857.1  362071.4  413357.1  250964.3  337850.0  384642.9
##   [78]  342928.6  841857.1  356714.3  295285.7  527000.0  421690.6  582285.7
##   [85] 1555714.3 2395714.3  293714.3  253685.7  823785.7  426714.3  503957.1
##   [92] 1166071.4  413735.4 1556642.9  401814.3  266642.9  354293.3  336857.1
##   [99]  318642.9  573178.6  224785.7  370071.4  676071.4  517571.1  290142.9
##  [106]  348564.3  243785.7  485500.0  411142.9  441071.4  608571.4  393464.3
##  [113]  444071.4  410500.0  370428.6 1628428.6  401571.4  695714.3  217142.9
##  [120]  554000.0  705714.3  365785.7  402857.1  702142.9  849514.3  525357.1
##  [127]  561857.1  512857.1  451428.6  445785.7  614000.0  454714.3  700214.3
##  [134]  423571.4  345421.4 1332500.0  286142.9  403000.0  464642.9  743571.4
##  [141]  353285.7  799285.7  225341.4  642427.9  510285.7  891000.0  226202.9
##  [148]  301314.3  328000.0  411142.9  567298.6  366714.3  228357.1  330714.1
##  [155]  475000.0  248571.4  401428.6  776785.7  431614.3  956714.3  341285.7
##  [162]  211428.6  295500.0  360714.3  541857.1 1361428.6 1001428.6  682428.6
##  [169]  608425.7  522248.6  423571.4 1775357.1  913357.1  242642.9  378492.9
##  [176] 1300285.7  291857.1  798571.4  630000.0  429000.0  396571.4  369928.6
##  [183] 1806071.4  458071.4  683142.9  728000.0 1335142.9  506707.1  266142.1
##  [190]  463000.0  474428.6  504835.7  312678.3  344285.7  455714.3  231714.3
##  [197]  324214.3  673000.0  457857.1  300785.7  526357.1  751785.7  568907.1
##  [204]  249285.7  423571.4  323207.1  260142.9  627333.9  415928.6  373714.3
##  [211]  357785.7  354428.6  734714.3  488642.9  875214.3  350760.6  443285.7
##  [218]  452857.1  300357.1  505857.1  232857.1  310071.4  454371.4  666017.9
##  [225]  246642.9  310928.6  677642.9  360285.7  223642.9 1067071.4  446928.6
##  [232]  550857.1  380064.3  347214.3 1497857.1 1251847.4  469785.7  262071.4
##  [239]  570500.0  281500.0  327571.4  311321.4  355785.7 1812857.1  681357.0
##  [246]  413928.6  414714.3  448642.9 6319553.6  923857.1  431785.7 1670714.3
##  [253]  529285.7  313285.7  575142.9  535621.4  465714.3 2200168.3  315928.6
##  [260]  780357.1 1072500.0  591285.7  560571.4  595285.7  713857.0  604428.6
##  [267]  369428.6  351221.9 1348571.4  496142.9 1008214.3  406428.6  297000.0
##  [274]  430142.9  401785.6 1567142.9  394857.1  402857.1 2252142.9  371714.1
##  [281] 1760607.1  435857.1  565714.3  362428.6  805142.9  384642.9  901428.6
##  [288]  417214.3  410947.7 1232857.1  349500.0  372928.6 1876785.7  758571.4
##  [295]  911040.9  506428.6  352357.1  646428.6  361714.3  262285.7 3885000.0
##  [302]  521428.6  225428.6  552000.0 1130714.3  334357.1  438714.3  349214.3
##  [309]  301721.9  808571.4  649428.6  409571.4  424071.4  505500.0  784714.3
##  [316] 1018357.1  453571.4 1300428.6  504642.9  393850.0 1109285.7  505357.1
##  [323]  261607.1  616400.0  352142.9 1110571.4  286371.4  279785.7  545642.9
##  [330]  687142.9  298000.0  394071.4  314778.6  761421.4  264642.9  495671.4
##  [337]  371428.6  637142.9  299642.9  404285.7  361357.1  246350.0  486642.9
##  [344] 1235500.0  663142.9 1154285.7  407042.9  549071.4  228071.4  338000.0
##  [351]  484571.4  502321.4  508000.0  392785.7  494000.0  428571.4  475571.4
##  [358]  530357.1  323500.0  485071.4  524285.7  395285.7  211214.3  401427.9
##  [365]  590928.6  477231.4  487142.9  576178.6  447571.4  447214.3  684021.4
##  [372]  722561.4  659071.4  360421.4  577321.4  341214.3  395214.3  514857.1
##  [379]  414571.4  436028.6  457857.1  324500.0  318714.3 1211857.1  353384.1
##  [386]  387142.9  526857.1  251214.3  785000.0  505928.6  292928.6  513142.9
##  [393]  372928.6  911642.9  319071.4  480721.4  428232.6  585714.3  289000.0
##  [400]  307778.6  835568.9  828714.3  462600.1  997928.6  844714.3  504857.1
##  [407]  400000.0  363500.0  448928.6  489142.9  603928.6  564650.0  559714.3
##  [414]  427214.3  312903.4  359357.1  451071.4  455742.9  551142.9  519571.4
##  [421] 1063214.3  463914.3  320000.0  400642.9  524000.0  469285.7  627571.4
##  [428]  215971.4  685285.7  393928.6  611357.1  738142.9  224528.6 1812857.1
##  [435]  528857.1  338285.7  613785.7  822142.9  527857.1  263571.4  676278.6
##  [442]  438598.4  810000.0  564000.0  296285.7  401000.0  653492.9  855785.7
##  [449]  720000.0  489364.3  655428.6  596312.9  415760.9 2131428.6  448928.6
##  [456]  305857.1 1460625.0  451571.4  335117.7 1549642.9 1418142.9  214142.9
##  [463]  230714.3  230000.0  878317.7  529214.3  468584.9  435642.9  892857.1
##  [470]  646857.1  327000.0  410714.3 3947142.9  439428.6  311857.1  243000.0
##  [477]  587357.1  455000.0  551492.9  739214.3  336321.4 1040000.0  577142.9
##  [484]  388571.4  395071.4  457214.3  813214.3  203757.1  407527.0 1754642.9
##  [491] 1663571.4  226000.0  203457.1  850814.3  429142.9  664642.9  499999.9
##  [498]  417785.7 1302857.1  705350.0  499642.7  392135.7 1155125.0  491142.9
##  [505]  322500.0  364035.7  556142.9  457857.1  229557.1  583571.4  357678.6
##  [512]  588583.1 1389219.9  544642.9  859285.7  391285.7  543571.3  288916.0
##  [519]  515142.9  729857.1  202785.7  471500.0  380064.3  431964.3  574142.9
##  [526]  317571.4 4218571.4  227142.9  685357.1  499285.7  503285.7  335714.3
##  [533]  712428.6  446285.7  628500.0  716997.4  315757.1 1215642.9  299107.1
##  [540]  403571.4  499714.3  618589.3 1141500.0  463214.3  593448.6  370642.9
##  [547]  815428.6  389642.9  642642.9 1422000.0  375528.6 1896714.3  455000.0
##  [554] 1036421.4  497071.4 1280714.3  364708.3  499428.6  469071.4  217142.9
##  [561]  433535.7  357785.7 1169714.3  549428.6  269357.1 1067857.0  642785.7
##  [568]  440214.3  267350.0  628571.4  464142.9  478571.4  454285.7 1430000.0
##  [575]  403071.4  308135.7  363642.9  926928.6  503000.0  358357.1  322428.6
##  [582]  409571.4  658792.1  299428.6 1676850.0  194000.0  853571.4  370928.6
##  [589]  283857.1 1127450.0  356464.3  252357.1  974214.3  203357.1  746071.4
##  [596] 1234285.7  606971.4  592500.0  463914.3  305142.9  350000.0  483000.0
##  [603]  541714.3  267642.9  850785.7 1329857.1  417057.1  778571.4  523857.1
##  [610]  619785.7  480714.3  339785.7  449142.9  402857.1  918714.3  280714.3
##  [617]  631857.1  468571.4  349528.6  469241.1  744428.6  379571.4  407807.1
##  [624]  424285.7  683142.9  242500.0  215270.0  397857.1  412714.3  635285.7
##  [631]  453178.6  394828.6  549571.4 1889285.7  598000.0  326285.7  222357.1
##  [638]  291308.9  685571.4  627642.9  340571.4  268785.7  279928.6  574285.7
##  [645]  961642.9  326300.0  406421.4  469278.6 1019428.6 1353214.3  685498.6
##  [652]  403142.9  372928.6  477642.7  381428.6  286071.4 1019071.4  669000.0
##  [659]  379142.9  395142.9  465000.0 2014985.7  235035.7  292928.6  962499.9
##  [666]  445264.3  455428.6 3284985.7  579857.1  319428.6  834500.0  634107.1
##  [673] 1118214.3  687428.6  419500.0  455400.0 1609285.7  734642.9  241535.7
##  [680]  701142.9  417642.9  380785.7  499999.9  528428.6  594306.4  391500.0
##  [687]  529500.0  385642.9  445214.3  543571.4  512500.0  404350.0  699428.6
##  [694]  492755.0  361785.7  261357.1  579642.9  360742.9 2546071.4 1987142.9
##  [701]  370428.6  334642.9  452427.7  327142.9  565285.7  487857.1  304278.6
##  [708]  208428.6  553500.0  540428.6  265214.3  526857.1  461285.7  408428.6
##  [715]  398214.3  216428.6  959928.6  538857.1  406857.1  426642.9  403157.1
##  [722]  566428.6  573928.6  403785.7  495071.4  543285.7  311071.4  520000.0
##  [729]  573785.7  932371.4  588428.6  375785.7  489600.0  414000.0  539648.7
##  [736] 2373428.6  625000.0  603142.9  536000.0  430857.1  267257.1  516428.6
##  [743]  309028.6  331928.6  225000.0  437785.7  256500.0  319928.6 1102928.6
##  [750] 1224571.4 1934285.7  509928.6  416428.6  518714.3  577428.6  399857.1
##  [757]  408642.9  560000.0  369357.1  380428.6  841857.1  426571.4  342000.0
##  [764]  346071.4  514142.9 1006857.1  553571.4  598442.9  520714.3  659714.3
##  [771]  490000.0  633285.7  371642.9 1088571.4  498571.4  483714.3 2252142.9
##  [778]  429571.4  327829.3  589142.9  386428.6  457821.1  578857.1  364928.6
##  [785] 2775642.9  566785.7  503142.9  486142.9  510828.6  469785.7  266571.4
##  [792]  962142.9  731571.4 1026500.0  354642.9  305928.6  237485.7  685492.9
##  [799]  717000.0  399857.1 1165000.0 1228071.4  380357.1  344885.7  580928.6
##  [806]  461428.6 1195714.3  458214.3 1727142.9  432785.7  462142.9  237857.1
##  [813]  760357.1  421714.3  343285.7  549142.9  609857.1  658142.9  457642.9
##  [820]  251428.6  728857.0 2479285.7  235071.4 1463000.0  809642.9  533339.6
##  [827]  362500.0  518500.0  563528.6  519357.1  327428.6  469285.7  467535.4
##  [834]  297285.7  418571.4  607285.7  207500.0  578571.4  445714.3  481571.4
##  [841]  438571.4  235857.1 1187857.1  361928.6  944828.6  424000.0  643782.9
##  [848]  460999.7  950785.7  924285.7 1367706.0  505357.1  384642.9  524999.7
##  [855]  411428.6  398000.0  525414.3  268999.3  589571.4  314142.9  619992.9
##  [862]  551873.6 2175785.7  435378.6  547428.6  393535.7  590714.3 1077171.4
##  [869]  365214.3  552857.1  741857.1  425278.4  231928.6  490285.7  374741.3
##  [876]  451857.1  381964.3  585714.3 1240471.4  791785.7  477421.4 1065893.0
##  [883]  241785.7  234214.3  343428.6  491785.7  461142.9  765285.7  368071.4
##  [890]  237500.0  590421.4  413464.3  690857.1  472571.4  705385.7  356214.3
##  [897]  326000.0  482421.4  498135.7  523428.6  451514.3  320607.1  928714.3
##  [904]  311028.6  563714.3  611142.9  673714.3  248500.0  372142.9  213000.0
##  [911]  580857.1  286428.6  368321.4  304642.9  423821.4  309300.0  816664.3
##  [918]  328428.6  691857.1  381071.4  404142.9  614127.1  375785.7  352500.0
##  [925]  279000.0  601806.4  510468.6  696071.4  406928.6  493428.6  232474.3
##  [932]  324927.9  544928.6  363571.4  240992.9  223714.3  333100.0 1390428.6
##  [939]  256071.4  806428.6  533321.4  498428.6  328428.6  481000.0  491084.9
##  [946]  377142.9  345928.6  406635.7  520428.6  677714.3  221142.9  340035.7
##  [953]  853506.4  571714.3  366785.7  352571.4  516000.0  404000.0  497928.6
##  [960]  261988.6  546071.4  392142.9 1095357.9  377357.1  718102.9  567298.6
##  [967]  384857.1 1620714.3 1839285.7 1300421.4  856071.4  623571.4  339357.1
##  [974]  378571.4  392000.0  474642.7  490857.1  744500.0  346857.1  655214.3
##  [981]  476428.6  401642.9  307357.1  610155.7 1466428.6  501071.4  520421.4
##  [988]  770714.3  343857.1  255342.9  783187.5  450000.0  454000.0 1798571.4
##  [995]  504500.0  353264.3  373714.3  304285.7  447514.3  424857.1  438785.7
## [1002]  514500.0  665000.0  540714.3  415285.7  344928.6  318428.6  344135.7
## [1009]  230571.4  543214.3  443357.1  343857.1  908142.9  407500.0  403785.7
## [1016]  544278.6  418064.3  322142.9  426500.0  298564.3  209627.1  408928.6
## [1023]  380928.6  372571.3  554428.6  542064.3 1969739.7  403571.4  336285.7
## [1030]  825912.9  487511.4  819571.4  330571.4  292142.9  475828.6  658285.7
## [1037]  498357.1  246000.0  580785.7  441000.0  259714.3  614142.9  473285.7
## [1044]  257285.7  452000.0  412135.7  216571.4  258500.0  395285.7  360714.3
## [1051]  801350.0  391214.3  379707.1  632642.1  521714.3 1752142.9  377285.7
## [1058]  477428.6 2995000.0  526714.3 1378214.3  688714.3  474285.7  389214.3
## [1065]  784242.9  732071.4  360142.9 1235357.1  241785.7  444578.6 1763785.7
## [1072]  345571.4  359285.7  417619.1  399071.4  931928.6  427428.6  604421.4
## [1079]  276785.7  331714.3  377714.3  430564.3  456357.1  849785.7  364714.3
## [1086]  676785.7  374928.6  853142.9  883857.1  317642.9  721428.6  415028.6
## [1093]  259628.6  386571.4  871428.6  557857.1  389271.4  383928.6  308000.0
## [1100]  196857.1  408785.7 1222178.6  650512.9  604428.6  342214.3  543857.1
## [1107]  482500.0 1941714.3  445371.4  493678.4  324000.0 3922142.9  795950.0
## [1114]  420714.3  347357.1  540142.9  666071.4  513142.9  327600.0  362500.0
## [1121]  393321.4  364142.9  206714.3  651071.4  528285.7  407500.0  413928.6
## [1128]  626714.3  495714.3  380857.1  401135.7  426000.0  220000.0  314714.3
## [1135] 1176471.4  741285.7  517714.3  596142.9  649357.1  327214.3  362285.7
## [1142]  442571.1  666304.9  200500.0  413928.6  953857.1  552714.3  498142.9
## [1149]  196607.1  199714.3  654428.6 1140428.6  337000.0  480785.7  275071.4
## [1156] 2907142.9  400000.0  503928.6  370785.7  298057.1  764285.7  775571.4
## [1163]  606157.1  589000.0  248642.9  504857.1  385900.0  590000.0  539750.0
## [1170]  603214.3  598214.3  325714.3  474600.0  576428.4  686282.9  583428.6
## [1177]  373900.0  994285.7  498071.4  612000.0  543857.1  419642.9  512000.0
## [1184]  548571.4  540642.9  363721.9 1471142.9  333144.1  297642.9  269428.6
## [1191]  515857.1  231857.1  417857.1 1333014.3  392428.6 1497857.1  542142.9
## [1198]  468928.6  472928.6  438857.1  546714.3 1367300.0  460928.6  336428.6
## [1205]  524714.3  209285.7  561142.9  540671.4  587071.4  408928.6  224642.9
## [1212]  768714.3  340500.0  593571.4  224928.6  412285.6  492285.7  635805.7
## [1219]  478928.6  589142.9  543285.7  644285.7  270428.6  448778.6  454500.0
## [1226]  232571.4  845357.1  737500.0  534714.3  687428.6  489333.6  378142.9
## [1233]  406785.7  212571.4  346107.1 1067285.7  381285.7  547142.9  670500.0
## [1240]  447850.0 1176850.0  692064.3  542071.4  488771.4  698714.3  513857.1
## [1247]  327642.7  494285.7  715671.4  306142.9  668118.6  333642.9 1057000.0
## [1254]  294928.6  322664.3  971142.9  408071.4 1284143.6  336785.7  758071.4
## [1261]  237785.7  494071.4  608100.0  484999.7  347071.4  398285.7  441807.1
## [1268]  534142.9 1142489.4  250000.0  655000.0  345142.9  363000.0  587714.3
## [1275]  463500.0 3707142.9  353142.9  757114.3  440500.0  504428.6  375000.0
## [1282]  421142.9  240625.0  548442.0  430714.3 1222178.6  477214.3  638125.7
## [1289] 1076428.6  361857.1  466157.1  621642.9  525785.7 1174172.9  699746.3
## [1296]  522421.3  448999.9  565000.0  540142.9 2555714.3  429357.1  341428.6
## [1303]  474571.4  394428.6  930257.1  515142.9  470000.0  296000.0 1136500.0
## [1310] 5784742.9  360000.0  400942.9  418785.7 1183685.7  427178.6  539642.7
## [1317] 1336428.6  616071.4  359357.1  555142.9  378500.0  398357.1  229198.6
## [1324]  677500.0  875314.3  753000.0  475528.7  580642.9  733714.3  606571.4
## [1331]  273285.7  417142.9  394500.0  870000.0  257714.3  390928.6  355142.9
## [1338]  449071.4 1345785.7  573285.7  499660.7 2382857.1  312964.0  862571.4
## [1345]  636071.4  512500.0  491085.7  486542.9  518000.0  618285.7  830321.4
## [1352]  276571.4  663285.7  371714.3  510341.4  899285.7  418928.6  390714.3
## [1359] 2555000.0  423428.6 1118357.1  571000.0  421285.7  348928.6  374714.3
## [1366]  349500.0  394285.7  255285.7  413250.0  902857.1  419785.7  932857.1
## [1373]  263571.4  239357.1  700000.0  662768.6  376857.1  398857.1  513714.3
## [1380]  529142.9  273714.3 2080142.9  278178.6  385128.6  263232.4  577785.7
## [1387]  871714.3  378928.6  462714.3 1113357.1 1902357.1  432000.0  239000.0
## [1394]  435357.1  379714.3  432571.4  372785.7  556916.0  459785.7  927357.1
## [1401]  360821.4  425642.9  401342.9  475357.1  447514.3  529733.4  224857.1
## [1408]  628492.9  490642.9  946071.4  343642.9  271142.9  292642.9  374857.1
## [1415] 1021857.1  434142.9  373214.3  758928.6 2189285.7  343928.6  436714.3
## [1422]  330400.7  331829.3  431107.1  611785.7  396214.3  727114.3  332142.9
## [1429]  312500.0  411241.3 1340128.6  344514.3  659571.4  361071.4  398714.3
## [1436]  451428.6  286142.9  440571.4  390160.7 1024857.1  483857.1  292571.4
## [1443] 1844642.9  252142.9  430714.3  908428.6  624928.6  326300.0  356785.7
## [1450] 1243214.3  393142.9  352357.1  399285.7  519525.6  428157.1  460285.7
## [1457]  324214.3  442142.9 1154000.7  391000.0  485857.1  370785.7  390592.9
## [1464]  674000.0  570000.0  481785.7  793928.6  392135.7 1312857.1  366071.4
## [1471] 1395857.1  615000.0 2297857.1  545907.1  495857.1 1217928.6  225000.0
## [1478]  414500.0  498357.1  265285.7  348928.6  261214.3  366714.3  395107.1
## [1485]  894642.9 1209285.7  217285.7  451571.4  354285.7  314142.9  772857.1
## [1492]  240821.4  614000.0  368285.7  654714.3  447214.3  538142.9  315142.9
## [1499]  500928.4  222428.6  249642.9  383000.0 1164214.3  512142.7 1339785.7
## [1506]  305357.1  442278.6  549285.7  304714.3  522500.0  573564.3  282285.7
## [1513] 1178857.1  597357.1  520000.0  404285.6  365357.1  392571.4  380714.3
## [1520]  341357.1  188642.9  956428.6  695142.9  525857.1  413928.6  721500.0
## [1527]  456571.4  345642.9  416357.1 1125500.0  795285.7  240857.1  435285.7
## [1534] 1491428.6  612285.7  751357.1  448228.4  920142.9  602357.1  408357.1
## [1541]  268635.7  309428.6  561857.1  493642.9  435714.3  325857.1  430678.6
## [1548]  364157.1  450785.7 1005285.7  437928.6  434928.6  463214.3  202614.3
## [1555]  377142.9  567500.0  539000.0  504857.1  414000.0 2629214.3 1271942.9
## [1562]  473821.3 2935000.0  304285.7  463928.6  384071.4  370071.4  454285.7
## [1569]  376000.0  484748.4  315571.4  290571.4  501421.4  415000.0  380500.0
## [1576]  248000.0  876285.7  455071.4  375035.7  335785.7  686071.4  526142.9
## [1583] 1176862.7  605714.3  492714.3  341428.6  464500.0  729071.4  586714.3
## [1590] 3831785.7  252713.6 1743678.6  425192.1  438071.4  599857.1  593357.1
## [1597]  235142.9  396057.1  517785.7  481000.0  266028.6  633500.0  824442.9
## [1604] 1193214.3  490878.6  497435.7  397642.9  604245.0  579000.0  350850.0
## [1611]  627571.4 1089542.0 1196385.7  321250.0  476714.3 2290142.9  391214.3
## [1618] 1015571.4  276071.4  306571.4 1035714.3  467571.4  943463.6  418785.7
## [1625]  409642.9  536142.9  383272.9  663357.1  508571.4  384000.0  493792.9
## [1632]  431428.6  750857.1  494214.3  481714.3  441000.0  525000.0  503571.4
## [1639]  435207.1  344642.9  547142.9  548571.4  295635.7  895000.0  363293.3
## [1646]  417428.6  459928.6  418000.0  700428.6  290178.6  562428.6  477849.3
## [1653]  500000.0  574642.7  288812.5  323171.4  251028.6  468571.4  438928.6
## [1660]  412428.6  350428.6  400428.6  450285.7  560714.3  364857.1  516142.9
## [1667]  428500.0  252142.9  229071.4 1118000.0  291428.6  611428.6  396571.4
## [1674]  715857.1  540071.4 1472142.9  401785.7  602850.0 1197857.1  603447.7
## [1681]  334142.9  307500.0  547070.7  308000.0  725000.0 1369428.6  914785.7
## [1688]  614285.7  527142.9  610357.1  218928.6  359571.4  564571.4  246571.4
## [1695]  492714.3  280642.9 2236428.6  407571.4  650000.0  540785.7 1425000.0
## [1702]  632285.7  434571.4 4334999.9  440071.4  354214.3  328428.6  538214.3
## [1709]  689971.4  329785.7  479285.7  945357.1  411262.1  185392.9  215571.4
## [1716] 1239285.6  358250.0  492778.6  210471.4  381357.1  602142.9 1028857.1
## [1723]  388185.7  215857.1  603714.3  220642.9  386000.0  360135.7 2284285.6
## [1730]  566857.1  418528.6  638364.3  524357.1  557928.6  296428.6  536428.6
## [1737]  282785.7  383750.0  455142.9  427778.6  531142.9  491857.1  389714.3
## [1744]  482857.1  738000.0  394207.1  407042.9  832857.1  296642.9 1205000.0
## [1751]  667428.6  432142.9  364785.7  398142.9  259642.9  356857.1  564000.0
## [1758]  452285.7  432000.0  654959.9  267571.4  310285.7  516071.4  259142.9
## [1765]  436357.1  568356.4  400571.4  477085.7  298714.3  518057.1  378500.0
## [1772]  525571.4  233428.6  526892.9 1247857.1  499278.6  304142.9  367714.3
## [1779]  314922.9  628642.9  465785.7  586821.4  253357.1  403428.6  352000.0
## [1786] 1349285.7  574142.9  488571.4 1467142.9  398142.9  489571.4  351071.4
## [1793]  644571.4  813602.9  440071.4 1350285.7  396642.9  325000.0  445142.9
## [1800]  528285.7  347071.4  393285.7  892307.7  358142.9  448000.0  425571.4
## [1807]  526142.9  470000.0  513857.1  353285.7  272714.3  297500.0  521207.1
## [1814]  602857.1  300785.7  445428.6  345357.1  648042.9  359500.0  332142.9
## [1821]  234071.4  497571.4  343114.3  562857.1  481142.9  742857.1  429571.4
## [1828]  530571.4  425249.9  473214.3  395714.3  476357.1  723214.3 2305699.9
## [1835]  734479.1  524814.3  365357.1  559857.1  255357.1  796428.6  401857.1
## [1842]  407857.1  312657.1  363714.3 1050714.3  402500.0  473500.0 1539285.7
## [1849] 4110714.3 1451857.1  681571.4  899714.3  444300.0  931082.1  259285.7
## [1856]  750357.1 3473071.4  606500.0  446142.9  725571.4  337357.1  600714.3
## [1863]  368428.6  437714.3  599285.7  450107.1  537857.1  505000.0  382928.6
## [1870]  493064.3  848642.9  504857.1  815714.3  343000.0  295285.7  215714.3
## [1877] 1743571.4  654285.7  334142.9  675568.6  503142.9  946428.6  330857.1
## [1884]  643278.6  504142.9  893870.6  614642.9  384500.0  287392.9  283000.0
## [1891]  606428.6  554928.6  375857.1  405785.7  327214.3  346000.0 1362142.9
## [1898]  279125.3  288357.1  358214.3  439000.0  439478.6  697857.1  714071.4
## [1905]  346642.9  373214.3  329428.6  429428.6  615714.3  429285.7  302892.9
## [1912]  324142.9  319857.1  440785.7  345000.0  763571.4  472142.9  395928.6
## [1919]  410714.3  440642.9  812428.6 1092264.3  810278.6 1075357.1  585099.7
## [1926]  398571.4  287285.7  578578.6  313000.0  539357.1  388071.4  446357.1
## [1933]  432161.1  445357.1  335428.6 1216428.6  456571.4  409071.4  468071.4
## [1940]  377642.9  492857.1  370812.9  341428.6  215000.0  452500.0  444285.7
## [1947]  454714.3  583928.6  412992.9  647857.1  406778.4  415857.1  830714.3
## [1954]  641428.6  596257.1  447900.0  321142.9  708571.4  611928.6  515571.4
## [1961]  280992.9 2264271.4  390642.9  507285.7 1712857.1  468642.9  633500.0
## [1968]  428928.6  390142.9  274785.7  282857.1  308000.0  509142.9  450857.1
## [1975]  487214.3  222928.6  326500.0  832928.6  267214.3  475714.3  412214.3
## [1982]  318642.9 1234285.7  428477.9  697785.7  490285.7  239857.1  685964.3
## [1989]  658785.7  660250.0  608214.3  476802.9
set.seed(1)

# 1 neuron
model4_brnn1 <- train(
  price ~ total_floor_area+co2_emissions_current+average_income+latitude+longitude+num_tube_lines+num_rail_lines+population+energy_consumption_current+num_light_rail_lines+london_zone+crime_number+distance_to_station+property_type,
  train_data,
  method = "brnn",
  trControl = control,
  tuneGrid = expand.grid(neurons=1)
    )
## + Fold1: neurons=1 
## Number of parameters (weights and biases) to estimate: 18 
## Nguyen-Widrow method
## Scaling factor= 0.7 
## gamma= 17.9696    alpha= 2.4282   beta= 220.5269 
## - Fold1: neurons=1 
## + Fold2: neurons=1 
## Number of parameters (weights and biases) to estimate: 18 
## Nguyen-Widrow method
## Scaling factor= 0.7 
## gamma= 17.9637    alpha= 2.8012   beta= 208.7922 
## - Fold2: neurons=1 
## + Fold3: neurons=1 
## Number of parameters (weights and biases) to estimate: 18 
## Nguyen-Widrow method
## Scaling factor= 0.7 
## gamma= 17.9686    alpha= 2.4911   beta= 223.9209 
## - Fold3: neurons=1 
## + Fold4: neurons=1 
## Number of parameters (weights and biases) to estimate: 18 
## Nguyen-Widrow method
## Scaling factor= 0.7 
## gamma= 17.9717    alpha= 2.381    beta= 234.7429 
## - Fold4: neurons=1 
## + Fold5: neurons=1 
## Number of parameters (weights and biases) to estimate: 18 
## Nguyen-Widrow method
## Scaling factor= 0.7 
## gamma= 17.9711    alpha= 2.5081   beta= 234.9489 
## - Fold5: neurons=1 
## Aggregating results
## Fitting final model on full training set
## Number of parameters (weights and biases) to estimate: 18 
## Nguyen-Widrow method
## Scaling factor= 0.7 
## gamma= 17.9733    alpha= 2.4954   beta= 225.8695
predictions <- predict(model4_brnn1,test_data)
brnn_results1<-data.frame(  RMSE = RMSE(predictions, test_data$price),
                            Rsquare = R2(predictions, test_data$price))

# 2 neurons
model4_brnn2 <- train(
  price ~ total_floor_area+co2_emissions_current+average_income+latitude+longitude+num_tube_lines+num_rail_lines+population+energy_consumption_current+num_light_rail_lines+london_zone+crime_number+distance_to_station+property_type,
  train_data,
  method = "brnn",
  trControl = control,
  tuneGrid = expand.grid(neurons=2)
    )
## + Fold1: neurons=2 
## Number of parameters (weights and biases) to estimate: 36 
## Nguyen-Widrow method
## Scaling factor= 0.7000581 
## gamma= 35.7822    alpha= 1.0744   beta= 299.4348 
## - Fold1: neurons=2 
## + Fold2: neurons=2 
## Number of parameters (weights and biases) to estimate: 36 
## Nguyen-Widrow method
## Scaling factor= 0.7000581 
## gamma= 35.7793    alpha= 1.1629   beta= 291.9246 
## - Fold2: neurons=2 
## + Fold3: neurons=2 
## Number of parameters (weights and biases) to estimate: 36 
## Nguyen-Widrow method
## Scaling factor= 0.7000581 
## gamma= 35.7633    alpha= 1.0083   beta= 326.387 
## - Fold3: neurons=2 
## + Fold4: neurons=2 
## Number of parameters (weights and biases) to estimate: 36 
## Nguyen-Widrow method
## Scaling factor= 0.7000581 
## gamma= 35.8282    alpha= 0.9392   beta= 319.3186 
## - Fold4: neurons=2 
## + Fold5: neurons=2 
## Number of parameters (weights and biases) to estimate: 36 
## Nguyen-Widrow method
## Scaling factor= 0.7000581 
## gamma= 35.7844    alpha= 1.0645   beta= 308.6356 
## - Fold5: neurons=2 
## Aggregating results
## Fitting final model on full training set
## Number of parameters (weights and biases) to estimate: 36 
## Nguyen-Widrow method
## Scaling factor= 0.7000465 
## gamma= 35.819     alpha= 1.0518   beta= 309.1937
predictions <- predict(model4_brnn2,test_data)
brnn_results2<-data.frame(  RMSE = RMSE(predictions, test_data$price),
                            Rsquare = R2(predictions, test_data$price))

# 3 neurons
model4_brnn3 <- train(
  price ~ total_floor_area+co2_emissions_current+average_income+latitude+longitude+num_tube_lines+num_rail_lines+population+energy_consumption_current+num_light_rail_lines+london_zone+crime_number+distance_to_station+property_type,
  train_data,
  method = "brnn",
  trControl = control,
  tuneGrid = expand.grid(neurons=3)
    )
## + Fold1: neurons=3 
## Number of parameters (weights and biases) to estimate: 54 
## Nguyen-Widrow method
## Scaling factor= 0.7000921 
## gamma= 52.8702    alpha= 0.9782   beta= 322.566 
## - Fold1: neurons=3 
## + Fold2: neurons=3 
## Number of parameters (weights and biases) to estimate: 54 
## Nguyen-Widrow method
## Scaling factor= 0.7000921 
## gamma= 53.2531    alpha= 1.0801   beta= 320.6918 
## - Fold2: neurons=3 
## + Fold3: neurons=3 
## Number of parameters (weights and biases) to estimate: 54 
## Nguyen-Widrow method
## Scaling factor= 0.7000921 
## gamma= 52.913     alpha= 0.8304   beta= 335.2411 
## - Fold3: neurons=3 
## + Fold4: neurons=3 
## Number of parameters (weights and biases) to estimate: 54 
## Nguyen-Widrow method
## Scaling factor= 0.7000921 
## gamma= 53.1251    alpha= 0.1129   beta= 385.9223 
## - Fold4: neurons=3 
## + Fold5: neurons=3 
## Number of parameters (weights and biases) to estimate: 54 
## Nguyen-Widrow method
## Scaling factor= 0.7000921 
## gamma= 53.2037    alpha= 1.1323   beta= 320.8945 
## - Fold5: neurons=3 
## Aggregating results
## Fitting final model on full training set
## Number of parameters (weights and biases) to estimate: 54 
## Nguyen-Widrow method
## Scaling factor= 0.7000737 
## gamma= 53.0461    alpha= 1.0833   beta= 327.5292
predictions <- predict(model4_brnn3,test_data)
brnn_results3<-data.frame(  RMSE = RMSE(predictions, test_data$price),
                            Rsquare = R2(predictions, test_data$price))

# 4 neurons
model4_brnn4 <- train(
  price ~ total_floor_area+co2_emissions_current+average_income+latitude+longitude+num_tube_lines+num_rail_lines+population+energy_consumption_current+num_light_rail_lines+london_zone+crime_number+distance_to_station+property_type,
  train_data,
  method = "brnn",
  trControl = control,
  tuneGrid = expand.grid(neurons=4)
    )
## + Fold1: neurons=4 
## Number of parameters (weights and biases) to estimate: 72 
## Nguyen-Widrow method
## Scaling factor= 0.7001162 
## gamma= 69.8144    alpha= 1.0808   beta= 236.5593 
## - Fold1: neurons=4 
## + Fold2: neurons=4 
## Number of parameters (weights and biases) to estimate: 72 
## Nguyen-Widrow method
## Scaling factor= 0.7001162 
## gamma= 70.7664    alpha= 0.1171   beta= 378.9905 
## - Fold2: neurons=4 
## + Fold3: neurons=4 
## Number of parameters (weights and biases) to estimate: 72 
## Nguyen-Widrow method
## Scaling factor= 0.7001162 
## gamma= 70.6434    alpha= 0.2226   beta= 396.6563 
## - Fold3: neurons=4 
## + Fold4: neurons=4 
## Number of parameters (weights and biases) to estimate: 72 
## Nguyen-Widrow method
## Scaling factor= 0.7001162 
## gamma= 69.9449    alpha= 0.987    beta= 344.1757 
## - Fold4: neurons=4 
## + Fold5: neurons=4 
## Number of parameters (weights and biases) to estimate: 72 
## Nguyen-Widrow method
## Scaling factor= 0.7001162 
## gamma= 70.7362    alpha= 0.0987   beta= 430.2258 
## - Fold5: neurons=4 
## Aggregating results
## Fitting final model on full training set
## Number of parameters (weights and biases) to estimate: 72 
## Nguyen-Widrow method
## Scaling factor= 0.7000929 
## gamma= 70.971     alpha= 0.0944   beta= 390.3082
predictions <- predict(model4_brnn4,test_data)
brnn_results4<-data.frame(  RMSE = RMSE(predictions, test_data$price),
                            Rsquare = R2(predictions, test_data$price))

# 5 neurons
model4_brnn5 <- train(
  price ~ total_floor_area+co2_emissions_current+average_income+latitude+longitude+num_tube_lines+num_rail_lines+population+energy_consumption_current+num_light_rail_lines+london_zone+crime_number+distance_to_station+property_type,
  train_data,
  method = "brnn",
  trControl = control,
  tuneGrid = expand.grid(neurons=5)
    )
## + Fold1: neurons=5 
## Number of parameters (weights and biases) to estimate: 90 
## Nguyen-Widrow method
## Scaling factor= 0.7001349 
## gamma= 86.4998    alpha= 0.1911   beta= 412.8044 
## - Fold1: neurons=5 
## + Fold2: neurons=5 
## Number of parameters (weights and biases) to estimate: 90 
## Nguyen-Widrow method
## Scaling factor= 0.7001349 
## gamma= 87.6152    alpha= 0.6934   beta= 388.7802 
## - Fold2: neurons=5 
## + Fold3: neurons=5 
## Number of parameters (weights and biases) to estimate: 90 
## Nguyen-Widrow method
## Scaling factor= 0.7001349 
## gamma= 87.6363    alpha= 0.2775   beta= 422.9289 
## - Fold3: neurons=5 
## + Fold4: neurons=5 
## Number of parameters (weights and biases) to estimate: 90 
## Nguyen-Widrow method
## Scaling factor= 0.7001349 
## gamma= 87.1703    alpha= 0.3695   beta= 371.1612 
## - Fold4: neurons=5 
## + Fold5: neurons=5 
## Number of parameters (weights and biases) to estimate: 90 
## Nguyen-Widrow method
## Scaling factor= 0.7001349 
## gamma= 85.9534    alpha= 0.0979   beta= 390.5327 
## - Fold5: neurons=5 
## Aggregating results
## Fitting final model on full training set
## Number of parameters (weights and biases) to estimate: 90 
## Nguyen-Widrow method
## Scaling factor= 0.7001079 
## gamma= 88.5332    alpha= 0.0754   beta= 418.5014
predictions <- predict(model4_brnn5,test_data)
brnn_results5<-data.frame(  RMSE = RMSE(predictions, test_data$price),
                            Rsquare = R2(predictions, test_data$price))

brnn_results<-data.frame(matrix(ncol = 3, nrow = 5))
x <- c("number_neuron", "RMSE", "Rsquare")
colnames(brnn_results) <- x
brnn_results$number_neuron[1] <- 1
brnn_results$number_neuron[2] <- 2
brnn_results$number_neuron[3] <- 3
brnn_results$number_neuron[4] <- 4
brnn_results$number_neuron[5] <- 5

brnn_results$RMSE[1] <- brnn_results1$RMSE
brnn_results$RMSE[2] <- brnn_results2$RMSE
brnn_results$RMSE[3] <- brnn_results3$RMSE
brnn_results$RMSE[4] <- brnn_results4$RMSE
brnn_results$RMSE[5] <- brnn_results5$RMSE


brnn_results$Rsquare[1] <- brnn_results1$Rsquare
brnn_results$Rsquare[2] <- brnn_results2$Rsquare
brnn_results$Rsquare[3] <- brnn_results3$Rsquare
brnn_results$Rsquare[4] <- brnn_results4$Rsquare
brnn_results$Rsquare[5] <- brnn_results5$Rsquare
highlight <- brnn_results %>%
             filter(number_neuron==3) # optimal number of neurons

ggplot(data=brnn_results, aes(x=number_neuron, y=RMSE, group=1)) +
  geom_line()+
  geom_point()+
  geom_point(data=highlight,
             aes(x=number_neuron,y=RMSE),
             color='red',
             size=3) +
  theme_minimal() +
  labs(title = "Model achieved lowest RMSE at 3 neurons",
       subtitle = "RMSE with different number of neurons",
       x = "Number of neurons",
       y = "RMSE")

ggplot(data=brnn_results, aes(x=number_neuron, y=Rsquare, group=1)) +
  geom_line()+
  geom_point()+
  geom_point(data=highlight,
             aes(x=number_neuron,y=Rsquare),
             color='red',
             size=3) +
  theme_minimal() +
  labs(title = "Model achieved highest Rsquared at 3 neurons",
       subtitle = "Rsquared with different number of neurons",
       x = "Number of neurons",
       y = "Rsquared")

#you can also visualize the variable importance
importance <- varImp(model4_brnn3, scale=TRUE)
plot(importance)

#We can predict prices for out of sample data the same way
predictions_oos <- predict(model4_brnn3,london_house_prices_2019_out_of_sample)
predictions_oos
##    [1]  952270.3  317565.3  379754.7  445816.8 1005285.0  524024.1  819215.6
##    [8]  321010.1  702625.7  507153.0  425614.6 1002260.1  303196.9 1202945.2
##   [15]  352114.1  432980.8  321321.4  225412.4  754116.2 1089450.4  596919.2
##   [22]  275968.5  217888.2 1397539.2 1167261.9  541072.2  265534.2  321049.0
##   [29] 1210096.6  573645.7  500499.2  499085.4  540619.2 1014608.3  868909.8
##   [36]  370710.5  690717.0  344391.3  882482.2  639360.8  336943.4  914781.5
##   [43]  478668.8  822023.8  629943.3  401760.8  304906.5  595761.7  334035.5
##   [50]  302484.6  418529.8  418272.9  443374.1  404364.8  609106.7  849584.9
##   [57]  539830.2  461792.0  533922.9  294901.5 3404679.5  259395.0  314955.0
##   [64]  779162.7  252355.7  223006.8  535077.0  776845.5  670941.4  304316.9
##   [71]  711864.5  482876.3  359687.3  484432.9  295866.2  280870.2  307158.6
##   [78]  356653.3  801945.8  318105.5  323874.8  461373.5  527936.9  650988.2
##   [85] 1879058.3 2078459.2  284822.3  280582.8  782219.6  368687.5  408085.9
##   [92]  882870.1  312981.1 1463842.3  497554.2  292481.7  222214.1  357756.9
##   [99]  286068.1  543557.6  287795.7  310443.6  563525.9  440115.8  322964.5
##  [106]  315389.5  247131.5  440105.2  476829.0  436171.3  552951.8  420598.7
##  [113]  404758.9  336301.8  298231.0 1773569.0  399726.7  516154.9  268488.6
##  [120]  482851.4  613666.2  328902.7  387097.5  771815.0 1007453.7  552234.2
##  [127]  719626.9  478593.5  321304.9  393887.5  428806.6  373332.1  658647.7
##  [134]  434887.7  284153.9 1500211.9  299904.4  410600.2  516523.7  849933.4
##  [141]  417626.2  744587.6  264380.5  611003.3  443129.1 1173150.2  250814.4
##  [148]  326295.1  269039.3  454928.3  514317.2  391664.5  315701.4  275344.2
##  [155]  439330.5  254496.4  412188.9 1134279.0  376296.9  845754.8  336430.9
##  [162]  287407.2  252921.4  365540.5  527286.7 1122193.5 1326604.5  612172.6
##  [169]  553825.3  424635.8  373710.4 1766519.3  827421.3  175660.6  298401.2
##  [176] 1614615.8  203400.2  713285.1  552255.7  375343.7  426021.4  382412.8
##  [183] 1882128.8  435394.4  595035.8  603866.8 1631135.5  401555.0  277760.1
##  [190]  380033.8  405324.3  407678.8  347130.1  329514.5  456391.2  286586.5
##  [197]  235776.6  540270.9  571891.8  318567.8  459888.1  959824.3  505443.5
##  [204]  252187.1  377668.6  237081.7  428835.3  698539.1  278744.3  361902.8
##  [211]  322204.1  369320.7  755911.4  604214.7  770905.6  321553.0  356740.8
##  [218]  494555.4  253274.5  486349.0  267249.2  329064.5  423765.4  574578.9
##  [225]  212804.3  339401.6  636681.8  379836.5  256323.1 1020092.3  365911.9
##  [232]  496186.5  415002.7  476334.8 1486484.1 1258586.7  432523.3  298184.9
##  [239]  659289.4  274473.1  327739.4  442581.2  336721.7 1698300.5  896540.1
##  [246]  374303.8  335539.0  567588.1 6259260.8 1164988.4  351454.8 1589882.7
##  [253]  492241.9  388931.8  750886.6  428531.3  482304.7 2303966.4  308386.1
##  [260]  919270.2  858468.9  547877.7  535846.3  472561.4  821058.3  664036.8
##  [267]  269960.6  290729.5 1537340.0  582651.8 1073592.6  439631.0  234836.5
##  [274]  449529.2  329638.2 1131590.7  405592.2  397394.9 1468056.4  384372.8
##  [281]  514002.0  266638.6  601915.5  372185.7  845259.6  358013.0  904438.6
##  [288]  342073.5  615986.0 1163534.6  287088.8  321675.6 1925739.0  799151.8
##  [295]  705578.5  635224.5  309990.0  532646.3  350512.5  306725.7 4541320.2
##  [302]  588014.9  295121.5  624430.7 1253147.3  340365.2  457459.1  324934.0
##  [309]  332273.9  839949.4  681430.9  359189.5  445013.9  434230.8  925049.1
##  [316]  931172.5  420156.2 1776186.2  468755.0  463111.5  699019.2  437186.2
##  [323]  279314.0  547896.6  335555.3 1201315.1  262962.3  299768.3  536043.6
##  [330]  896959.8  327718.7  374045.1  338819.9  708735.5  261770.1  493909.9
##  [337]  328140.9  635489.3  286551.8  416422.0  351784.5  296342.5  483679.6
##  [344] 1424817.7  668624.8 1114881.4  362212.3  657829.3  296099.1  285477.3
##  [351]  474220.3  777359.3  348433.5  370987.5  388972.4  438373.7  398327.4
##  [358]  377871.5  296882.1  467638.2  503993.8  432993.7  246062.8  371228.9
##  [365]  547966.7  426732.8  566951.7  556247.8  451658.1  437250.0  841913.6
##  [372]  987163.2  629377.5  239904.7  697850.4  348564.6  586092.7  516455.2
##  [379]  414565.1  363875.7  411827.2  285635.2  287483.1 1489122.3  349558.3
##  [386]  380234.3  456724.9  340719.2  769045.8  548569.7  297558.1  483914.8
##  [393]  378903.0  848653.9  301322.9  341391.1  473291.8  681865.5  314141.9
##  [400]  333954.4  808312.2 1043901.3  573604.7 1038324.6 1009707.9  394448.0
##  [407]  268657.4  319986.8  502121.0  569077.0  584894.6  445618.3  713915.8
##  [414]  363945.5  294841.4  264914.6  362792.3  584304.2  474155.3  458702.3
##  [421]  869963.7  446189.9  324190.7  369913.6  516431.8  496971.3  466425.6
##  [428]  236690.6  532464.6  460785.1  438534.0 1192984.3  260240.8 1589838.9
##  [435]  523289.2  303205.0  575667.2  782758.9  454708.9  212174.6  746406.7
##  [442]  397676.7  856020.7  348089.3  346488.0  484265.1  973991.6  613861.4
##  [449]  646400.0  636517.0  655189.7  507249.4  475364.6 1512761.3  473063.5
##  [456]  230025.5 1386644.5  389254.1  341449.8 1345053.4 1581408.9  199599.8
##  [463]  328429.3  248505.2 1004470.1  418029.9  432756.9  434012.7  707333.8
##  [470]  632856.3  337698.3  363931.5 3043094.2  493004.0  282226.0  286372.9
##  [477]  627187.6  437467.0  413185.0  666950.4  355970.5  899881.7  533912.4
##  [484]  381265.0  375500.5  440838.9  901731.3  271277.6  438792.5 1807281.7
##  [491] 2066457.2  316642.8  247111.5  924533.7  353504.0  688878.3  414152.3
##  [498]  336217.5  901226.5  773916.1  541557.3  336519.9  801773.8  374190.5
##  [505]  346529.5  318708.1  473358.8  546329.2  294670.0  496501.4  347561.2
##  [512]  612589.0 1514504.3  473238.2 1150806.2  387084.7  346547.3  267162.4
##  [519]  518500.9  590295.4  201956.1  412280.2  422842.0  350095.3  630086.5
##  [526]  286300.0 3673393.2  280777.2  651366.3  417993.2  460470.3  345887.6
##  [533]  402573.0  413872.3  554749.7  765805.5  379541.6 1156133.2  410077.0
##  [540]  283391.2  426940.0  597906.3 1394323.0  483483.9  613997.2  385832.0
##  [547]  771747.6  331459.9  644739.0 1110058.3  375606.6 2029663.3  403060.0
##  [554] 1045294.3  393163.4 1609386.6  310941.1  544346.4  405657.3  290470.8
##  [561]  376709.0  254065.4 1231416.3  681120.4  265605.5 1209686.8  815107.3
##  [568]  397935.0  250443.6  486436.8  445519.1  456750.4  472240.1 1636060.1
##  [575]  332762.0  274703.7  282076.9  810607.0  408469.9  392044.4  325366.3
##  [582]  357989.0  672927.0  271490.3 1474661.7  258247.9  522192.7  331448.2
##  [589]  220036.9 1065544.0  405281.4  307252.7  988675.9  271030.2  648205.5
##  [596] 1218000.2  490828.9  456579.2  476127.7  321502.7  333780.8  507477.7
##  [603]  448575.4  351284.1  838734.8 1638641.4  531138.3  613630.3  438939.6
##  [610]  576392.6  399281.0  327778.4  447251.3  314506.4 1216023.3  310628.6
##  [617]  848248.0  474594.1  340411.2  487102.8  669507.0  401612.9  423527.3
##  [624]  440734.5  553471.7  174796.2  248214.9  414763.3  392334.4  674684.2
##  [631]  445182.3  276059.3  477577.6 1568154.5  618387.9  335483.2  245573.8
##  [638]  268508.4  502941.7  647756.6  322288.0  212235.1  302590.4  495106.6
##  [645]  582719.1  309742.7  502061.5  417403.2 1425766.6  782169.5  654829.5
##  [652]  360736.1  306846.1  489520.2  403393.2  255943.2 1012550.1  673438.1
##  [659]  365119.7  458826.6  326538.6 2052251.7  298214.6  324945.8  627184.3
##  [666]  365957.5  641387.7 2598578.5  519027.8  382016.6  970608.7  689966.3
##  [673] 1547024.7  515899.6  395935.9  318920.2 1608154.4  491639.4  252871.5
##  [680]  682101.6  506217.6  533307.0  428893.3  479111.7  484466.9  318354.9
##  [687]  478944.8  421292.3  434609.1  477054.9  428658.1  403225.3  553994.3
##  [694]  336249.4  333878.3  239472.5  836008.8  291437.3 2208719.6 2243427.2
##  [701]  426985.0  362127.0  380261.8  359202.9  457065.9  402097.0  321777.0
##  [708]  309061.5  555598.2  474453.8  246832.2  418845.6  525763.0  385556.6
##  [715]  388904.0  311716.4  871716.4  450868.1  402205.8  365016.1  353406.2
##  [722]  501249.2  396860.0  421763.1  533578.0  531371.8  268980.6  450909.8
##  [729]  352111.9  645390.1  376896.4  436009.0  538094.6  265729.8  593537.0
##  [736] 1560632.4  468788.9  540668.9  511699.9  366409.6  300997.9  606945.8
##  [743]  331830.9  334461.3  280674.0  416441.5  208008.0  308539.2 1331192.9
##  [750]  850389.4 1956909.8  459915.9  421894.1  472531.9  699349.8  422684.9
##  [757]  330225.7  718620.9  315987.1  393785.8  858647.9  410333.2  332174.3
##  [764]  426391.5  432278.3  911992.9  682243.7  718369.8  705839.5  581965.3
##  [771]  529582.2  651347.1  339115.2  775636.9  459523.0  384495.2 1821100.2
##  [778]  385618.8  355274.0  385711.9  452453.1  420278.2  449787.6  330573.9
##  [785] 2232944.1  606120.8  447538.5  479550.5  530350.8  376724.5  327018.5
##  [792]  765900.9  765866.9  900524.1  330110.6  445095.0  251027.3  853456.6
##  [799]  707668.2  512793.2  956456.4 1169069.6  383773.4  333792.8  588542.0
##  [806]  453463.6 1231711.6  428568.7 1918351.4  500081.0  626513.4  276095.4
##  [813]  480545.8  378341.7  282134.9  511811.9  694731.5  707627.0  469804.2
##  [820]  282607.2 1003166.6 3009493.2  198632.1 1251363.9  584873.5  467233.3
##  [827]  353099.5  499686.7  621131.3  460255.2  333609.0  524000.7  438917.3
##  [834]  336423.5  411212.4  665537.3  126381.8  489417.7  364574.6  559669.4
##  [841]  446461.2  331975.0  926793.4  348933.6  949910.7  513241.5  649436.8
##  [848]  401796.1  941032.0 1164894.4 1275868.4  614736.2  401232.9  488229.1
##  [855]  419960.2  429905.1  459356.4  273772.3  442251.3  305940.3  516137.9
##  [862]  508948.5 1885139.7  546635.0  496356.1  425944.6 1219039.2 1612067.5
##  [869]  251199.7  545879.8  636981.4  417111.9  290277.6  546174.8  375407.4
##  [876]  319630.9  450827.3  441065.2 1452800.2  647678.8  337814.1 1101529.8
##  [883]  298855.9  274546.9  288531.4  356454.2  388012.0  603110.0  356197.4
##  [890]  313782.7  668157.4  375597.9  568913.4  839627.7  775957.9  357640.9
##  [897]  462474.6  402862.6  614092.9  556189.4  471894.0  305843.2 1027491.0
##  [904]  345112.8  529415.4  465701.1  588986.5  245145.5  477828.3  240773.1
##  [911]  453475.1  301121.6  401996.7  293149.1  584159.6  289968.3 1079802.2
##  [918]  333312.1  652218.9  350782.8  366454.6  674099.3  283761.0  331746.0
##  [925]  339644.5  483826.8  475315.2  683521.8  383217.7  565569.6  254445.6
##  [932]  340908.7  696165.8  330918.2  268748.7  303429.0  371634.6 1213046.9
##  [939]  257043.1  754661.8  670674.1  488130.7  325366.9  346456.7  523738.4
##  [946]  309705.0  331372.8  518658.3  639557.2  486120.2  227701.7  334275.1
##  [953]  833063.0  471935.2  358034.2  328844.3  539899.3  452637.8  507020.1
##  [960]  252126.4  418981.0  389259.4 1282910.0  338800.9  888590.3  558279.8
##  [967]  378854.8 1418209.5 1656636.9 1277964.3  960151.9  815427.6  337877.0
##  [974]  348939.7  420449.1  417896.5  518137.5  745491.5  358922.3  980879.2
##  [981]  476863.4  413065.7  318756.9  658983.9 2155805.9  453837.1  483130.5
##  [988]  792466.2  364190.6  266274.7  960445.2  439749.5  562244.1 1738560.9
##  [995]  482105.6  403952.8  563940.7  253967.3  491346.8  520543.3  399642.3
## [1002]  638366.7  609420.1  474274.2  386948.6  331913.3  324158.1  275220.7
## [1009]  301572.6  789583.5  451754.2  352620.5  987586.4  350443.9  315203.4
## [1016]  422984.0  313663.1  346518.9  550843.2  314439.7  307694.4  428993.0
## [1023]  429562.9  380732.8  457358.4  591618.6 1742088.6  347381.3  344429.9
## [1030]  797528.9  425430.9 1269854.3  343507.8  262333.7  412714.4  462770.0
## [1037]  488354.1  252878.9  501278.3  370426.8  246416.5  634386.4  364692.1
## [1044]  317535.5  385210.2  543169.6  223118.9  267423.7  343577.6  350037.1
## [1051]  856068.6  415417.6  367095.8  585734.3  539849.9 1972940.4  415507.2
## [1058]  385592.4 2290239.8  381390.3 1414377.3  902088.8  400674.0  475806.0
## [1065]  591841.3  651369.1  410753.3 1348124.7  310866.6  376274.6 2140253.9
## [1072]  343466.1  284314.6  429774.9  348512.3 1022550.1  366277.7  597485.7
## [1079]  252688.5  400098.1  362371.5  485431.6  418662.0  765037.4  421862.7
## [1086]  584399.0  361722.2  796024.3  873556.4  256429.5  739062.4  438028.2
## [1093]  208671.3  224710.0  920635.5  535586.0  392126.9  338062.1  349251.8
## [1100]  229684.4  393766.7 1417137.6  725780.4  766613.1  363781.6  584717.1
## [1107]  533205.1 1638773.0  343788.4  674060.9  388837.6 2286205.7  725589.4
## [1114]  449604.7  362497.3  467763.3  612284.3  565969.1  379967.4  349410.4
## [1121]  316379.7  419679.7  244714.6  888450.3  624014.5  390210.6  384695.5
## [1128]  609450.6  504789.0  344826.6  538362.2  373010.1  251809.6  310377.1
## [1135] 1807394.1  653394.9  601264.9  651834.1  614163.9  304059.4  368954.3
## [1142]  390287.5  710153.1  225782.1  350230.2 1153728.1  464516.9  814204.3
## [1149]  276205.6  258343.6  691570.0 1193253.4  285070.9  380072.0  294720.7
## [1156] 2983921.3  409117.2  549017.7  396005.1  312883.5  776572.3  861717.6
## [1163]  480916.1  855360.2  265854.2  513616.3  434777.3  507344.2  493605.9
## [1170]  450378.2  778537.8  316976.6  444944.4  515472.5  695519.3  479083.8
## [1177]  269556.8  866590.0  546835.5  847071.3  635126.3  409149.3  444324.2
## [1184]  883582.8  475001.7  294626.5 1589299.7  316754.2  325824.2  244021.5
## [1191]  546077.1  242849.2  313261.1 1326415.2  386656.6 1584409.7  599805.4
## [1198]  447242.4  527432.9  468920.0  644652.5 1372339.3  398561.7  356688.5
## [1205]  553506.1  277804.1  803661.6  430125.3  736849.1  407619.5  274143.9
## [1212]  828668.9  291107.2  771673.5  255119.8  409877.1  577403.2  574074.4
## [1219]  453444.9  461272.7  497472.0  924989.8  310598.2  518586.9  395395.0
## [1226]  272736.3  904939.9  950268.6  438819.0  739853.2  409578.9  347960.6
## [1233]  319879.7  247195.9  333380.5 1036242.4  410849.9  542859.6  499313.8
## [1240]  313458.5 1531261.0  772719.6  442704.7  501905.4  956709.1  423021.3
## [1247]  331249.6  510037.3  947286.7  329838.3  726905.2  349838.4 1028606.3
## [1254]  391398.6  317647.4  893584.9  371710.2  943450.1  454107.1  563652.2
## [1261]  248389.5  458439.3  507684.5  390723.7  340673.9  468469.4  432703.2
## [1268]  517176.3  850976.4  291990.2  705638.7  341794.1  386415.8  535639.2
## [1275]  419827.6 3885929.1  344554.6  657479.1  451103.8  719542.8  337218.5
## [1282]  312944.2  270432.5  618871.6  433703.5 1375393.8  487576.7  637579.8
## [1289] 1532778.2  330462.8  383010.5  587517.8  440923.9  867587.0  704967.6
## [1296]  450973.3  533072.4  417385.7  557238.2 1250509.0  392949.3  343555.5
## [1303]  400488.9  275447.2  659892.8  609283.2  512054.3  262351.8 1111214.0
## [1310] 5243791.1  341138.8  314530.9  619872.1 1413268.0  430430.1  407702.4
## [1317] 1200441.4  622914.3  394023.5  680673.3  384204.9  407132.5  291137.1
## [1324]  760451.5  717876.8  737046.8  766602.6  431602.3  942575.7  509202.3
## [1331]  318796.8  439604.4  349043.1  880337.2  308598.9  307446.6  334741.5
## [1338]  351080.1 1445983.6  639717.3  455135.5 2418336.2  315215.3  750322.5
## [1345]  719025.5  762520.2  636300.7  440967.0  425797.2  391124.5  572786.9
## [1352]  310242.9  661431.7  337722.8  404931.3  974489.1  435306.3  475545.5
## [1359] 2168933.5  519926.2  983203.8  576677.5  383142.1  292811.4  364885.0
## [1366]  310672.8  385746.8  266515.4  389433.3  991054.0  416351.0 1014175.9
## [1373]  263882.5  293206.3  690617.4  527489.2  441863.0  338452.9  423668.7
## [1380]  455920.9  291437.3 1188449.3  273984.7  314733.3  332124.2  426952.4
## [1387] 1043598.3  424350.3  425753.9 1156511.9 1888458.9  489281.2  230512.3
## [1394]  402188.7  563165.1  360435.8  345285.9  447660.9  503711.6  643158.7
## [1401]  327521.5  359552.7  411068.3  494297.9  391935.1  811897.7  214702.0
## [1408]  649286.0  426801.8  901923.7  348106.6  263328.1  297405.4  340690.0
## [1415] 1273380.8  417760.4  345354.1  849488.8 2135605.7  322120.7  432310.6
## [1422]  362459.4  343303.4  575217.5  545344.1  373245.1  599164.0  283348.6
## [1429]  296141.2  414095.1 1411578.3  348191.7  755563.9  365881.9  326204.4
## [1436]  360813.2  314882.3  585812.5  394946.3  952453.1  439390.0  295394.3
## [1443] 2538361.8  262032.6  416627.2 1342184.8  883679.9  231392.9  350103.4
## [1450] 1483429.5  310738.6  310448.8  357637.4  487788.9  434721.1  399464.2
## [1457]  497463.4  467935.0 1181841.3  397324.0  527638.5  369597.1  393819.7
## [1464]  474295.4  547244.9  398588.5  968438.1  401243.9 1581643.8  380458.9
## [1471] 1526165.6  657338.6 2965385.9  464678.1  442027.6 1107659.7  282785.7
## [1478]  378775.7  424949.8  325198.5  323276.8  290510.8  341947.2  643156.1
## [1485]  821815.2 1077921.1  289190.1  493986.8  322104.9  320368.0  776820.6
## [1492]  337798.6  568916.5  371636.2  506103.4  371373.7  422799.8  358488.5
## [1499]  454407.3  306713.5  220871.9  390151.3 1249446.0  368233.6  885794.6
## [1506]  298760.4  401613.6  457784.7  313441.8  892727.3  446224.0  261368.1
## [1513] 1104416.8  580781.5  601668.6  310088.6  366050.7  442404.7  333778.8
## [1520]  381475.7  216934.0  813363.9  914506.5  495096.9  334593.9  835385.9
## [1527]  464213.2  320743.4  481554.1 1224411.4  934875.9  269843.0  390959.0
## [1534] 1965905.1  533193.0  951547.0  336067.7  865748.5  453946.8  386736.8
## [1541]  271078.2  327246.8  463395.2  551138.6  398926.7  220400.8  438386.9
## [1548]  275339.1  430776.3 1248463.0  464354.2  360740.1  429114.8  274231.6
## [1555]  357041.0  369400.8  574124.0  582962.6  417060.4 2178388.6 1438276.1
## [1562]  817333.7 2320163.2  320080.1  485645.1  368954.5  332001.1  391619.4
## [1569]  358175.0  359697.6  326433.8  307913.3  644114.4  373076.1  347500.9
## [1576]  273995.9 1120703.7  394855.4  433480.6  358225.0  579182.9  621083.7
## [1583] 1318793.7  710316.2  448933.9  336620.7  414667.5  747732.8  461901.9
## [1590] 9066238.8  293694.3 1440169.6  375220.7  551054.0  690642.6  866746.4
## [1597]  261608.7  367174.9  487273.5  407931.1  290751.4  722796.4  865569.9
## [1604] 1093641.1  412845.2  581617.1  348281.3  611421.9  665518.9  441947.4
## [1611]  772232.2 1125361.1 1246799.8  400780.2  480455.5 1791418.7  422843.3
## [1618] 1137142.0  329595.0  337422.2  954841.0  536036.9 1251008.2  628398.4
## [1625]  390500.6  701848.6  369948.9  648646.1  511642.7  344129.1  475412.8
## [1632]  439324.5  651720.9  552739.9  599600.8  561173.2  407261.0  555295.7
## [1639]  449355.3  273611.9  497760.1  949101.8  332450.3  802496.7  404212.1
## [1646]  343362.4  401303.9  636425.2  659908.2  279015.5  467760.4  570255.3
## [1653]  411256.7  589606.8  245216.6  271640.3  241306.5  475409.1  408086.8
## [1660]  458919.6  382418.9  414531.1  382773.1  726700.1  356130.7  470373.7
## [1667]  328216.8  293138.7  252928.9  766213.4  250370.9  525835.7  382331.1
## [1674]  698206.8  477206.2 1346537.5  408794.5  654620.4 1233793.1  672836.7
## [1681]  352711.7  285176.4  509361.4  326801.3  986679.2 1305134.8  883445.5
## [1688]  501457.2  476648.2  400560.1  267616.4  419169.6  593810.5  188880.7
## [1695]  729599.2  279218.0 3078313.2  354667.3  454753.5  527905.1 1372329.8
## [1702]  505391.2  410444.7 3178461.6  397416.6  372008.7  318669.5  543758.0
## [1709]  625722.3  350055.8  352687.1 1019398.6  386092.6  286371.5  219618.4
## [1716] 1369332.0  328979.3  418578.6  287188.6  467827.5  645356.4 1322924.8
## [1723]  540960.3  278871.6  610118.8  183215.1  439734.2  367211.8 1809007.6
## [1730]  539354.9  498329.3  620419.3  403914.9  473315.5  324585.5  345409.1
## [1737]  372658.4  400641.0  473121.3  432112.0  452665.9  511056.0  383316.3
## [1744]  368628.9  652549.2  395324.9  307629.7  842886.7  281686.2 1523207.4
## [1751]  603233.6  365427.7  409753.6  379994.5  266655.7  271135.9  366159.1
## [1758]  452123.6  601885.3  922193.2  334705.4  323442.4  434010.9  264855.7
## [1765]  514822.0  577231.0  426759.4  583402.1  318104.7  517810.1  322612.9
## [1772]  378288.1  250034.5  471398.4 1032354.9  413048.7  288009.7  435316.3
## [1779]  295036.4  800710.9  378467.5  625909.0  309368.9  421766.3  355328.3
## [1786] 1161779.2  618780.2  419860.8 1719764.3  346559.5  413960.9  371677.2
## [1793]  693699.7  849751.6  419557.1 1413306.6  399331.4  331660.9  430230.6
## [1800]  624649.7  398358.4  391471.7  652253.4  382343.0  412139.3  509026.1
## [1807]  838651.1  585708.1  449487.0  389660.1  354640.7  294898.2  450237.9
## [1814]  697382.1  337645.9  413492.9  495213.0  906761.2  290507.2  323952.9
## [1821]  237913.6  388528.2  407169.6  695815.2  516548.5  613010.8  415192.0
## [1828]  435661.3  469475.8  402556.1  485902.4  416766.6  493745.1 2102127.7
## [1835]  691917.0  487997.7  341671.3  405562.2  291435.1  786567.8  376077.6
## [1842]  435274.1  348199.7  349227.9 1094768.2  461664.0  389550.8 1409702.9
## [1849] 3888086.9 1396077.4  769454.2  924684.5  349520.1 1141665.4  224862.6
## [1856] 1181777.4 2296568.3  479770.6  411485.8  707882.8  395227.8  431183.7
## [1863]  482515.4  356266.4  889083.8  607356.8  480666.1  393077.3  234670.5
## [1870]  406005.4 1026533.4  534039.2  708890.9  328608.3  222198.8  266482.9
## [1877] 1261887.4  388662.9  325743.9  793681.9  465965.4  736151.3  295733.4
## [1884]  732813.8  490047.9 1060574.4  750470.7  398753.4  289297.0  277476.6
## [1891]  726362.9  784699.0  342888.7  348703.7  316820.3  319213.1  959820.9
## [1898]  326072.9  281497.2  322870.9  386550.3  465574.3  567362.2  747396.2
## [1905]  284123.4  377522.9  369854.6  478735.9  581848.8  473446.5  427099.9
## [1912]  318492.7  344618.9  391917.6  280426.4  955021.8  421658.3  392796.4
## [1919]  417164.2  469075.1  676711.1 1135803.6  939656.0 1126987.4  458920.4
## [1926]  393038.8  248004.9  763558.1  295410.6  669937.3  378507.6  422885.4
## [1933]  470527.8  472236.7  460692.2  966261.7  620883.0  368139.3  447594.0
## [1940]  322642.3  431292.4  346518.3  321545.0  248108.7  437457.7  676038.2
## [1947]  421958.0  589680.5  595917.3  567820.5  420656.8  414966.6  607231.7
## [1954]  674084.8  815537.5  368632.7  360034.3  761989.3  581457.9  418074.2
## [1961]  272900.7 1835035.5  340352.7  500084.6 1346547.8  577365.3  515369.5
## [1968]  409667.0  414047.5  286524.9  278784.1  309390.2  501577.6  365015.2
## [1975]  362179.8  253538.9  337784.7  780226.3  318566.9  427843.3  429396.0
## [1982]  316472.2 1512501.0  386087.1  594203.3  445518.0  291725.0  503167.1
## [1989]  921803.2  967154.3  811225.4  392957.6

7 Stacking

Use stacking to ensemble your algorithms.

set.seed(1) # to return the same result in the following chunks

train_predict_lm <- predict(model1_lm,train_data)
train_predict_tree <- predict(model2_tree,train_data)
train_predict_knn <- predict(model3_knn,train_data)
train_predict_brnn <- predict(model4_brnn3,train_data)

stacking_train <- as.data.frame(cbind(train_predict_lm,train_predict_tree,train_predict_knn,train_predict_brnn, train_data$price))

names(stacking_train)[5] <- "price"

#we are going to train the combiner model and report the results using k-fold cross validation
combiner <- train(
  price ~ .,
  stacking_train,
  method = "lm",
  trControl = control,
  metric = "RMSE")
## + Fold1: intercept=TRUE 
## - Fold1: intercept=TRUE 
## + Fold2: intercept=TRUE 
## - Fold2: intercept=TRUE 
## + Fold3: intercept=TRUE 
## - Fold3: intercept=TRUE 
## + Fold4: intercept=TRUE 
## - Fold4: intercept=TRUE 
## + Fold5: intercept=TRUE 
## - Fold5: intercept=TRUE 
## Aggregating results
## Fitting final model on full training set
stacking_train$train_predictions <- predict(combiner,stacking_train)

stacking_train_result<-data.frame(  RMSE = RMSE(stacking_train$train_predictions, stacking_train$price),
                            Rsquare = R2(stacking_train$train_predictions, stacking_train$price))


stacking_train_result # performance of combiner model on training data
##       RMSE   Rsquare
## 1 194822.8 0.8623335
test_predict_lm <- predict(model1_lm,test_data)
test_predict_tree <- predict(model2_tree,test_data)
test_predict_knn <- predict(model3_knn,test_data)
test_predict_brnn <- predict(model4_brnn3,test_data)

stacking_test <- as.data.frame(cbind(test_predict_lm,test_predict_tree,test_predict_knn,test_predict_brnn))

colnames(stacking_test) = colnames(stacking_train)[1:4]

stacking_test$test_predictions <- predict(combiner,stacking_test)

stacking_test_result<-data.frame(  RMSE = RMSE(stacking_test$test_predictions, test_data$price),
                            Rsquare = R2(stacking_test$test_predictions, test_data$price))

stacking_test_result # performance of combiner model on testing data
##       RMSE   Rsquare
## 1 217744.1 0.8134421

8 Pick investments

In this section you should use the best algorithm you identified to choose 200 properties from the out of sample data.

set.seed(1) # to return the same result in the following chunks

numchoose=200 # number of houses to invest in

oos<-london_house_prices_2019_out_of_sample

oos_predict_lm <- predict(model1_lm,oos)
oos_predict_tree <- predict(model2_tree,oos)
oos_predict_knn <- predict(model3_knn,oos)
oos_predict_brnn <- predict(model4_brnn3,oos)

stacking_oos <- as.data.frame(cbind(oos_predict_lm,oos_predict_tree,oos_predict_knn,oos_predict_brnn))

colnames(stacking_oos) = colnames(stacking_train)[1:4]

oos$predict <- predict(combiner,stacking_oos)

#predict the value of houses

selection <- oos %>%
  mutate(profit = (predict - asking_price)/asking_price) %>%
  slice_max(profit, n = numchoose) %>% # Choose the 200 houses to invest
  select(ID)

selection$buy <- 1

oos<-read.csv("test_data_assignment.csv")

oos <- oos %>% 
  left_join(selection, by = "ID")

oos$buy[is.na(oos$buy)] <- 0

#output your choices. Change the name of the file to your "lastname_firstname.csv"
write.csv(oos,"Ding_Linli.csv")